diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 1341b6c4d79..1eb52962b56 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -44,9 +44,9 @@ local docker_config_json_secret = secret('dockerconfigjson', 'secret/data/common // secret needed for dep-tools local gh_token_secret = secret('gh_token', 'infra/data/ci/github/grafanabot', 'pat'); -local tempo_app_id_secret = secret('tempo_app_id_secret', 'ci/data/repo/grafana/tempo/github-app', 'app-id'); -local tempo_app_installation_id_secret = secret('tempo_app_installation_id_secret', 'ci/data/repo/grafana/tempo/github-app', 'app-installation-id'); -local tempo_app_private_key_secret = secret('tempo_app_private_key_secret', 'ci/data/repo/grafana/tempo/github-app', 'app-private-key'); +local tempo_app_id_secret = secret('tempo_app_id_secret', 'infra/data/ci/tempo/github-app', 'app-id'); +local tempo_app_installation_id_secret = secret('tempo_app_installation_id_secret', 'infra/data/ci/tempo/github-app', 'app-installation-id'); +local tempo_app_private_key_secret = secret('tempo_app_private_key_secret', 'infra/data/ci/tempo/github-app', 'app-private-key'); // secret to sign linux packages local gpg_passphrase = secret('gpg_passphrase', 'infra/data/ci/packages-publish/gpg', 'passphrase'); @@ -302,12 +302,17 @@ local deploy_to_dev() = { for d in aws_serverless_deployments ], }, + + local ghTokenFilename = '/drone/src/gh-token.txt'; // Build and release packages // Tested by installing the packages on a systemd container pipeline('release') { trigger: { event: ['tag', 'pull_request'], }, + image_pull_secrets: [ + docker_config_json_secret.name, + ], volumes+: [ { name: 'cgroup', @@ -353,6 +358,18 @@ local deploy_to_dev() = { image: 'docker:git', commands: ['git fetch --tags'], }, + { + name: 'Generate GitHub token', + image: 'us.gcr.io/kubernetes-dev/github-app-secret-writer:latest', + environment: { + GITHUB_APP_ID: { from_secret: tempo_app_id_secret.name }, + GITHUB_APP_INSTALLATION_ID: { from_secret: tempo_app_installation_id_secret.name }, + GITHUB_APP_PRIVATE_KEY: { from_secret: tempo_app_private_key_secret.name }, + }, + commands: [ + '/usr/bin/github-app-external-token > %s' % ghTokenFilename, + ], + }, { name: 'write-key', image: 'golang:1.23', @@ -398,7 +415,10 @@ local deploy_to_dev() = { { name: 'release', image: 'golang:1.23', - commands: ['make release'], + commands: [ + 'export GITHUB_TOKEN=$(cat %s)' % ghTokenFilename, + 'make release' + ], environment: { NFPM_DEFAULT_PASSPHRASE: { from_secret: gpg_passphrase.name }, NFPM_SIGNING_KEY_FILE: '/drone/src/private-key.key', diff --git a/.drone/drone.yml b/.drone/drone.yml index de908ebfcde..2cf4926f943 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -420,6 +420,8 @@ trigger: - refs/heads/r??? --- depends_on: [] +image_pull_secrets: +- dockerconfigjson kind: pipeline name: release platform: @@ -443,6 +445,17 @@ steps: - git fetch --tags image: docker:git name: fetch +- commands: + - /usr/bin/github-app-external-token > /drone/src/gh-token.txt + environment: + GITHUB_APP_ID: + from_secret: tempo_app_id_secret + GITHUB_APP_INSTALLATION_ID: + from_secret: tempo_app_installation_id_secret + GITHUB_APP_PRIVATE_KEY: + from_secret: tempo_app_private_key_secret + image: us.gcr.io/kubernetes-dev/github-app-secret-writer:latest + name: Generate GitHub token - commands: - printf "%s" "$NFPM_SIGNING_KEY" > $NFPM_SIGNING_KEY_FILE environment: @@ -476,6 +489,7 @@ steps: - name: docker path: /var/run/docker.sock - commands: + - export GITHUB_TOKEN=$(cat /drone/src/gh-token.txt) - make release environment: NFPM_DEFAULT_PASSPHRASE: @@ -524,19 +538,19 @@ name: gh_token --- get: name: app-id - path: ci/data/repo/grafana/tempo/github-app + path: infra/data/ci/tempo/github-app kind: secret name: tempo_app_id_secret --- get: name: app-installation-id - path: ci/data/repo/grafana/tempo/github-app + path: infra/data/ci/tempo/github-app kind: secret name: tempo_app_installation_id_secret --- get: name: app-private-key - path: ci/data/repo/grafana/tempo/github-app + path: infra/data/ci/tempo/github-app kind: secret name: tempo_app_private_key_secret --- @@ -583,6 +597,6 @@ kind: secret name: gpg_passphrase --- kind: signature -hmac: 89985402118d8a96c616f0a26c40b8b5e6fda8e904231dbe76f709cbf9603a8a +hmac: 0265cd585d8c7fc444bebc8aa1164ec6aa7893c2aa16f3beb61503102b00a798 ... diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29c1bcaa96e..e9766f45700 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,6 +79,8 @@ jobs: steps: - name: Check out code uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow --tags - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/drone-signature-check.yml b/.github/workflows/drone-signature-check.yml new file mode 100644 index 00000000000..6c07976761e --- /dev/null +++ b/.github/workflows/drone-signature-check.yml @@ -0,0 +1,17 @@ +name: Check Drone CI Signature + +on: + push: + branches: + - "main" + paths: + - ".drone/drone.yml" + pull_request: + paths: + - ".drone/drone.yml" + +jobs: + drone-signature-check: + uses: grafana/shared-workflows/.github/workflows/check-drone-signature.yaml@main + with: + drone_config_path: .drone/drone.yml diff --git a/.github/workflows/milestoned_to_project.yml b/.github/workflows/milestoned_to_project.yml index 34336ca33da..2198b8c24b2 100644 --- a/.github/workflows/milestoned_to_project.yml +++ b/.github/workflows/milestoned_to_project.yml @@ -19,7 +19,7 @@ jobs: app-id: ${{secrets.APP_ID}} private-key: ${{secrets.APP_PRIVATE_KEY}} owner: ${{ github.repository_owner }} - - uses: actions/add-to-project@v1.0.0 + - uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/grafana/projects/135 github-token: ${{ steps.get-github-app-token.outputs.token }} diff --git a/.github/workflows/sbom-report.yml b/.github/workflows/sbom-report.yml index ed1748a641c..7a5638da07f 100644 --- a/.github/workflows/sbom-report.yml +++ b/.github/workflows/sbom-report.yml @@ -14,7 +14,7 @@ jobs: uses: actions/checkout@v4 - name: Anchore SBOM Action - uses: anchore/sbom-action@v0.17.2 + uses: anchore/sbom-action@v0.17.5 with: artifact-name: ${{ github.event.repository.name }}-spdx.json diff --git a/.gitignore b/.gitignore index 2d6dc7eff7b..d46261d6155 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ .idea .vscode *.test +*.out +*.pprof /bin /cmd/tempo-cli/tempo-cli /cmd/tempo-query/tempo-query @@ -19,5 +21,7 @@ /tempodb/encoding/benchmark_block private-key.key integration/e2e/e2e_integration_test[0-9]* +integration/e2e/deployments/e2e_integration_test[0-9]* .tempo.yaml /tmp +gh-token.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cb4177f43c..f5a405c1291 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,23 +1,22 @@ ## main / unreleased - +* [CHANGE] **BREAKING CHANGE** Change the AWS Lambda serverless build tooling output from "main" to "bootstrap". Refer to https://aws.amazon.com/blogs/compute/migrating-aws-lambda-functions-from-the-go1-x-runtime-to-the-custom-runtime-on-amazon-linux-2/ for migration steps [#3852](https://github.com/grafana/tempo/pull/3852) (@zatlodan) +* [ENHANCEMENT] The span multiplier now also sources its value from the resource attributes. [#4210](https://github.com/grafana/tempo/pull/4210) +* [FEATURE] Export cost attribution usage metrics from distributor [#4162](https://github.com/grafana/tempo/pull/4162) (@mdisibio) +* [ENHANCEMENT] Changed log level from INFO to DEBUG for the TempoDB Find operation using traceId to reduce excessive/unwanted logs in log search. [#4179](https://github.com/grafana/tempo/pull/4179) (@Aki0x137) +* [ENHANCEMENT] Pushdown collection of results from generators in the querier [#4119](https://github.com/grafana/tempo/pull/4119) (@electron0zero) +* [CHANGE] Add throughput and SLO metrics in the tags and tag values endpoints [#4148](https://github.com/grafana/tempo/pull/4148) (@electron0zero) * [CHANGE] tempo-cli: add support for /api/v2/traces endpoint [#4127](https://github.com/grafana/tempo/pull/4127) (@electron0zero) **BREAKING CHANGE** The `tempo-cli` now uses the `/api/v2/traces` endpoint by default, please use `--v1` flag to use `/api/traces` endpoint, which was the default in previous versions. -* [ENHANCEMENT] Speedup collection of results from ingesters in the querier [#4100](https://github.com/grafana/tempo/pull/4100) (@electron0zero) -* [ENHANCEMENT] Speedup DistinctValue collector and exit early for ingesters [#4104](https://github.com/grafana/tempo/pull/4104) (@electron0zero) -* [ENHANCEMENT] Add disk caching in ingester SearchTagValuesV2 for completed blocks [#4069](https://github.com/grafana/tempo/pull/4069) (@electron0zero) -* [BUGFIX] Replace hedged requests roundtrips total with a counter. [#4063](https://github.com/grafana/tempo/pull/4063) [#4078](https://github.com/grafana/tempo/pull/4078) (@galalen) -* [BUGFIX] Metrics generators: Correctly drop from the ring before stopping ingestion to reduce drops during a rollout. [#4101](https://github.com/grafana/tempo/pull/4101) (@joe-elliott) -* [BUGFIX] Bring back application-json content-type header. [#4121](https://github.com/grafana/tempo/pull/4121) (@javiermolinar) -* [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno) -* [BUGFIX] Pushes a 0 to classic histogram's counter when the series is new to allow Prometheus to start from a non-null value. [#4140](https://github.com/grafana/tempo/pull/4140) (@mapno) * [CHANGE] TraceByID: don't allow concurrent_shards greater than query_shards. [#4074](https://github.com/grafana/tempo/pull/4074) (@electron0zero) -* **BREAKING CHANGE** tempo-query is no longer a jaeger instance with grpcPlugin. Its now a standalone server. Serving a grpc api for jaeger on `0.0.0.0:7777` by default. [#3840](https://github.com/grafana/tempo/issues/3840) (@frzifus) * [CHANGE] **BREAKING CHANGE** The dynamic injection of X-Scope-OrgID header for metrics generator remote-writes is changed. If the header is aleady set in per-tenant overrides or global tempo configuration, then it is honored and not overwritten. [#4021](https://github.com/grafana/tempo/pull/4021) (@mdisibio) * [CHANGE] **BREAKING CHANGE** Migrate from OpenTracing to OpenTelemetry instrumentation. Removed the `use_otel_tracer` configuration option. Use the OpenTelemetry environment variables to configure the span exporter [#3646](https://github.com/grafana/tempo/pull/3646) (@andreasgerstmayr) To continue using the Jaeger exporter, use the following environment variable: `OTEL_TRACES_EXPORTER=jaeger`. * [CHANGE] No longer send the final diff in GRPC streaming. Instead we rely on the streamed intermediate results. [#4062](https://github.com/grafana/tempo/pull/4062) (@joe-elliott) * [CHANGE] Update Go to 1.23.1 [#4146](https://github.com/grafana/tempo/pull/4146) [#4147](https://github.com/grafana/tempo/pull/4147) (@javiermolinar) +* [CHANGE] TraceQL: Add range condition for byte predicates [#4198](https://github.com/grafana/tempo/pull/4198) (@ie-pham) +* [CHANGE] Return 422 for TRACE_TOO_LARGE queries [#4160](https://github.com/grafana/tempo/pull/4160) (@zalegrala) +* [CHANGE] Upgrade OTEL sdk to reduce allocs [#4243](https://github.com/grafana/tempo/pull/4243) (@joe-elliott) * [FEATURE] Discarded span logging `log_discarded_spans` [#3957](https://github.com/grafana/tempo/issues/3957) (@dastrobu) * [FEATURE] TraceQL support for instrumentation scope [#3967](https://github.com/grafana/tempo/pull/3967) (@ie-pham) * [ENHANCEMENT] TraceQL: Attribute iterators collect matched array values [#3867](https://github.com/grafana/tempo/pull/3867) (@electron0zero, @stoewer) @@ -27,11 +26,39 @@ **BREAKING CHANGE** Removed `querier_forget_delay` setting from the frontend. This configuration option did nothing. * [ENHANCEMENT] Update metrics-generator config in Tempo distributed docker compose example to serve TraceQL metrics [#4003](https://github.com/grafana/tempo/pull/4003) (@javiermolinar) * [ENHANCEMENT] Reduce allocs related to marshalling dedicated columns repeatedly in the query frontend. [#4007](https://github.com/grafana/tempo/pull/4007) (@joe-elliott) +* [ENHANCEMENT] Improve performance of TraceQL queries [#4114](https://github.com/grafana/tempo/pull/4114) (@mdisibio) +* [ENHANCEMENT] Improve performance of TraceQL queries [#4163](https://github.com/grafana/tempo/pull/4163) (@mdisibio) +* [ENHANCEMENT] Reduce memory usage of classic histograms in the span-metrics and service-graphs processors [#4232](https://github.com/grafana/tempo/pull/4232) (@mdisibio) * [ENHANCEMENT] Implement simple Fetch by key for cache items [#4032](https://github.com/grafana/tempo/pull/4032) (@javiermolinar) * [ENHANCEMENT] Replace Grafana Agent example by Grafana Alloy[#4030](https://github.com/grafana/tempo/pull/4030) (@javiermolinar) * [ENHANCEMENT] Support exporting internal Tempo traces via OTLP exporter when `use_otel_tracer` is enabled. Use the OpenTelemetry SDK environment variables to configure the span exporter. [#4028](https://github.com/grafana/tempo/pull/4028) (@andreasgerstmayr) * [ENHANCEMENT] TraceQL metrics queries: add min_over_time [#3975](https://github.com/grafana/tempo/pull/3975) (@javiermolinar) +* [ENHANCEMENT] TraceQL metrics queries: add max_over_time [#4065](https://github.com/grafana/tempo/pull/4065) (@javiermolinar) * [ENHANCEMENT] Write tenantindex as proto and json with a prefernce for proto [#4072](https://github.com/grafana/tempo/pull/4072) (@zalegrala) +* [ENHANCEMENT] Pool zstd encoding/decoding for tmepodb/backend [#4208](https://github.com/grafana/tempo/pull/4208) (@zalegrala) +* [ENHANCEMENT] The span multiplier now also sources its value from the resource attributes. [#4210](https://github.com/grafana/tempo/pull/4210) +* [ENHANCEMENT] Changed log level from INFO to DEBUG for the TempoDB Find operation using traceId to reduce excessive/unwanted logs in log search. [#4179](https://github.com/grafana/tempo/pull/4179) (@Aki0x137) +* [ENHANCEMENT] Pushdown collection of results from generators in the querier [#4119](https://github.com/grafana/tempo/pull/4119) (@electron0zero) +* [ENHANCEMENT] Send semver version in api/stattus/buildinfo for cloud deployments [#4110](https://github.com/grafana/tempo/pull/4110) [@Aki0x137] +* [ENHANCEMENT] Speedup DistinctString and ScopedDistinctString collectors [#4109](https://github.com/grafana/tempo/pull/4109) (@electron0zero) +* [ENHANCEMENT] Speedup collection of results from ingesters in the querier [#4100](https://github.com/grafana/tempo/pull/4100) (@electron0zero) +* [ENHANCEMENT] Speedup DistinctValue collector and exit early for ingesters [#4104](https://github.com/grafana/tempo/pull/4104) (@electron0zero) +* [ENHANCEMENT] Add disk caching in ingester SearchTagValuesV2 for completed blocks [#4069](https://github.com/grafana/tempo/pull/4069) (@electron0zero) +* [BUGFIX] Replace hedged requests roundtrips total with a counter. [#4063](https://github.com/grafana/tempo/pull/4063) [#4078](https://github.com/grafana/tempo/pull/4078) (@galalen) +* [BUGFIX] Metrics generators: Correctly drop from the ring before stopping ingestion to reduce drops during a rollout. [#4101](https://github.com/grafana/tempo/pull/4101) (@joe-elliott) +* [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno) +* [BUGFIX] Pushes a 0 to classic histogram's counter when the series is new to allow Prometheus to start from a non-null value. [#4140](https://github.com/grafana/tempo/pull/4140) (@mapno) +* [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#44236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar) +* [BUGFIX] Skip computing exemplars for instant queries. [#4204](https://github.com/grafana/tempo/pull/4204) (@javiermolinar) + +# v2.6.1 + +* [CHANGE] **BREAKING CHANGE** tempo-query is no longer a Jaeger instance with grpcPlugin. It's now a standalone server. Serving a gRPC API for Jaeger on `0.0.0.0:7777` by default. [#3840](https://github.com/grafana/tempo/issues/3840) (@frzifus) +* [ENHANCEMENT] Register gRPC health server to tempo-query. [#4178](https://github.com/grafana/tempo/pull/4178) (@frzifus) +* [ENHANCEMENT] Support Tempo on IBM s390x. [#4175](https://github.com/grafana/tempo/pull/4175) (@pavolloffay) +* [ENHANCEMENT] tempo-query: Separate TLS settings for server and client. [#4177](https://github.com/grafana/tempo/pull/4177) (@frzifus) +* [ENHANCEMENT] Speedup tempo-query trace search by allowing parallel queries. [#4159](https://github.com/grafana/tempo/pull/4159) (@pavolloffay) +* [BUGFIX] Bring back application-json content-type header. [#4123](https://github.com/grafana/tempo/pull/4123) (@javiermolinar) # v2.6.0 diff --git a/CODEOWNERS b/CODEOWNERS index 7e5fb7c548f..2ba415d7043 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -11,15 +11,15 @@ # In each subsection folders are ordered first by depth, then alphabetically. # This should make it easy to add new rules without breaking existing ones. -* @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +* @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer @javiermolinar -/docs/ @knylander-grafana @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/docs/ @knylander-grafana @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/.github/backport.yml @jdbaldry @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/.github/update-make-docs.yml @jdbaldry @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/.github/website-next.yml @jdbaldry @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/.github/website-versioned.yml @jdbaldry @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/docs/docs.mk @jdbaldry @knylander-grafana @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/docs/make-docs @jdbaldry @knylander-grafana @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/docs/Makefile @jdbaldry @knylander-grafana @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer -/docs/variables.mk @jdbaldry @knylander-grafana @joe-elliott @annanay25 @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/.github/backport.yml @jdbaldry @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/.github/update-make-docs.yml @jdbaldry @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/.github/website-next.yml @jdbaldry @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/.github/website-versioned.yml @jdbaldry @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/docs/docs.mk @jdbaldry @knylander-grafana @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/docs/make-docs @jdbaldry @knylander-grafana @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/docs/Makefile @jdbaldry @knylander-grafana @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer +/docs/variables.mk @jdbaldry @knylander-grafana @joe-elliott @mdisibio @mapno @kvrhdn @zalegrala @electron0zero @ie-pham @stoewer diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 77aafbd1af2..29f97278808 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -46,9 +46,9 @@ In case a member leaves, the [offboarding](#offboarding) procedure is applied. The current team members are: -- Annanay Agarwal - [annanay25](https://github.com/annanay25) ([Grafana Labs](https://grafana.com/)) - Suraj Nath - [electron0zero](https://github.com/electron0zero) ([Grafana Labs](https://grafana.com/)) - Jennie Pham - [ie-pham](https://github.com/ie-pham) ([Grafana Labs](https://grafana.com/)) +- Javi Molina - [javiermolinar](https://github.com/javiermolinar) ([Grafana Labs](https://grafana.com/)) - Joe Elliott - [joe-elliott](https://github.com/joe-elliott) ([Grafana Labs](https://grafana.com/)) - Kim Nylander - [knylander-grafana](https://github.com/knylander-grafana) ([Grafana Labs](https://grafana.com/)) - Koenraad Verheyden - [kvrhdn](https://github.com/kvrhdn) ([Grafana Labs](https://grafana.com/)) @@ -69,6 +69,10 @@ A maintainer or committer may resign by notifying the [team mailing list][team]. A project may have multiple maintainers, as long as the responsibilities are clearly agreed upon between them. This includes coordinating who handles which issues and pull requests. +### Emeritus Maintainers + +Emeritus maintainers are former maintainers who no longer work directly on Tempo on a regular basis. We respect their former contributions by giving them the Emeritus Maintainer title. This is honorary only and confers no responsibilities or rights regarding the Tempo project. + ### Technical decisions Technical decisions that only affect a single project are made informally by the maintainer of this project, and [rough consensus](#consensus) is assumed. Technical decisions that span multiple parts of the project should be discussed and made on the [developer mailing list][devs]. diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 16cec25de94..242ca882bcb 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -1,10 +1,15 @@ -* @annanay25 * @electron0zero * @ie-pham +* @javiermolinar * @joe-elliott * @knylander-grafana * @kvrhdn * @mapno * @mdisibio * @stoewer -* @zalegrala \ No newline at end of file +* @zalegrala + +Emeritus Maintainers + +* @annanay25 +* @dgzlopes \ No newline at end of file diff --git a/Makefile b/Makefile index 45f4ae59c6d..74b5569e959 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ help: ## Display this help .DEFAULT_GOAL:=help # Version number -VERSION=$(shell ./tools/image-tag | cut -d, -f 1) +VERSION=$(shell ./tools/version-tag.sh | cut -d, -f 1) GIT_REVISION := $(shell git rev-parse --short HEAD) GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD) diff --git a/cmd/tempo-query/main.go b/cmd/tempo-query/main.go index 22aed3f8d23..8ef235531a9 100644 --- a/cmd/tempo-query/main.go +++ b/cmd/tempo-query/main.go @@ -3,26 +3,31 @@ package main import ( "flag" "net" + "os" "strings" - "github.com/hashicorp/go-hclog" - hcplugin "github.com/hashicorp/go-plugin" "github.com/jaegertracing/jaeger/proto-gen/storage_v1" + zaplogfmt "github.com/jsternberg/zap-logfmt" otgrpc "github.com/opentracing-contrib/go-grpc" "github.com/opentracing/opentracing-go" "github.com/spf13/viper" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" google_grpc "google.golang.org/grpc" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/health" + "google.golang.org/grpc/health/grpc_health_v1" "github.com/grafana/tempo/cmd/tempo-query/tempo" ) func main() { - logger := hclog.New(&hclog.LoggerOptions{ - Name: "jaeger-tempo", - Level: hclog.Error, - JSONFormat: true, - }) + config := zap.NewProductionEncoderConfig() + logger := zap.New(zapcore.NewCore( + zaplogfmt.NewEncoder(config), + os.Stdout, + zapcore.InfoLevel, + )) var configPath string flag.StringVar(&configPath, "config", "", "A path to the plugin's configuration file") @@ -37,16 +42,16 @@ func main() { err := v.ReadInConfig() if err != nil { - logger.Error("failed to parse configuration file", "error", err) + logger.Error("failed to parse configuration file", zap.Error(err)) } } cfg := &tempo.Config{} cfg.InitFromViper(v) - backend, err := tempo.New(cfg) + backend, err := tempo.New(logger, cfg) if err != nil { - logger.Error("failed to init tracer backend", "error", err) + logger.Error("failed to init tracer backend", zap.Error(err)) } grpcOpts := []google_grpc.ServerOption{ @@ -54,28 +59,32 @@ func main() { google_grpc.StreamInterceptor(otgrpc.OpenTracingStreamServerInterceptor(opentracing.GlobalTracer())), } - if cfg.TLSEnabled { + if cfg.TLSServerEnabeld { creds, err := credentials.NewClientTLSFromFile(cfg.TLS.CertPath, cfg.TLS.ServerName) if err != nil { - logger.Error("failed to load TLS credentials", "error", err) + logger.Error("failed to load TLS credentials", zap.Error(err)) } else { grpcOpts = append(grpcOpts, google_grpc.Creds(creds)) } } - srv := hcplugin.DefaultGRPCServer(grpcOpts) + srv := google_grpc.NewServer(grpcOpts...) storage_v1.RegisterSpanReaderPluginServer(srv, backend) storage_v1.RegisterDependenciesReaderPluginServer(srv, backend) storage_v1.RegisterSpanWriterPluginServer(srv, backend) + healthServer := health.NewServer() + grpc_health_v1.RegisterHealthServer(srv, healthServer) + healthServer.SetServingStatus("", grpc_health_v1.HealthCheckResponse_SERVING) + lis, err := net.Listen("tcp", cfg.Address) if err != nil { - logger.Error("failed to listen", "error", err) + logger.Error("failed to listen", zap.Error(err)) } - logger.Info("Server starts serving", "address", cfg.Address) + logger.Info("Server starts serving", zap.String("address", cfg.Address)) if err := srv.Serve(lis); err != nil { - logger.Error("failed to serve", "error", err) + logger.Error("failed to serve", zap.Error(err)) } } diff --git a/cmd/tempo-query/tempo/config.go b/cmd/tempo-query/tempo/config.go index d995e0fa903..fc3e387fd3b 100644 --- a/cmd/tempo-query/tempo/config.go +++ b/cmd/tempo-query/tempo/config.go @@ -8,12 +8,17 @@ import ( // Config holds the configuration for redbull. type Config struct { - Address string `yaml:"address"` - Backend string `yaml:"backend"` - TLSEnabled bool `yaml:"tls_enabled" category:"advanced"` + Address string `yaml:"address"` + Backend string `yaml:"backend"` + // TLSEnabled enables tls outgoing requests from tempo-query to tempo. + TLSEnabled bool `yaml:"tls_enabled" category:"advanced"` + // TLSServerEnabeld enables tls for incoming requests to the tempo-query API. + TLSServerEnabeld bool `yaml:"tls_server_enabled" category:"advanced"` TLS tls.ClientConfig `yaml:",inline"` TenantHeaderKey string `yaml:"tenant_header_key"` QueryServicesDuration string `yaml:"services_query_duration"` + // FindTracesConcurrentRequests defines how many concurrent requests trace search submits to get a trace. + FindTracesConcurrentRequests int `yaml:"find_traces_concurrent_requests"` } // InitFromViper initializes the options struct with values from Viper @@ -25,6 +30,7 @@ func (c *Config) InitFromViper(v *viper.Viper) { c.Address = address c.Backend = v.GetString("backend") c.TLSEnabled = v.GetBool("tls_enabled") + c.TLSServerEnabeld = v.GetBool("tls_server_enabled") c.TLS.CertPath = v.GetString("tls_cert_path") c.TLS.KeyPath = v.GetString("tls_key_path") c.TLS.CAPath = v.GetString("tls_ca_path") @@ -33,7 +39,11 @@ func (c *Config) InitFromViper(v *viper.Viper) { c.TLS.CipherSuites = v.GetString("tls_cipher_suites") c.TLS.MinVersion = v.GetString("tls_min_version") c.QueryServicesDuration = v.GetString("services_query_duration") + c.FindTracesConcurrentRequests = v.GetInt("find_traces_concurrent_requests") + if c.FindTracesConcurrentRequests == 0 { + c.FindTracesConcurrentRequests = 1 + } tenantHeader := v.GetString("tenant_header_key") if tenantHeader == "" { tenantHeader = shared.BearerTokenKey diff --git a/cmd/tempo-query/tempo/plugin.go b/cmd/tempo-query/tempo/plugin.go index b6bc6478188..25708d78275 100644 --- a/cmd/tempo-query/tempo/plugin.go +++ b/cmd/tempo-query/tempo/plugin.go @@ -11,6 +11,7 @@ import ( "os" "strconv" "strings" + "sync" "time" "github.com/go-logfmt/logfmt" @@ -22,6 +23,7 @@ import ( "go.opentelemetry.io/collector/pdata/ptrace" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" + "go.uber.org/zap" "google.golang.org/grpc/metadata" jaeger "github.com/jaegertracing/jaeger/model" @@ -62,15 +64,17 @@ var ( var tracer = otel.Tracer("cmd/tempo-query/tempo") type Backend struct { - tempoBackend string - tlsEnabled bool - tls tlsCfg.ClientConfig - httpClient *http.Client - tenantHeaderKey string - QueryServicesDuration *time.Duration + logger *zap.Logger + tempoBackend string + tlsEnabled bool + tls tlsCfg.ClientConfig + httpClient *http.Client + tenantHeaderKey string + QueryServicesDuration *time.Duration + findTracesConcurrentRequests int } -func New(cfg *Config) (*Backend, error) { +func New(logger *zap.Logger, cfg *Config) (*Backend, error) { httpClient, err := createHTTPClient(cfg) if err != nil { return nil, err @@ -88,12 +92,14 @@ func New(cfg *Config) (*Backend, error) { } return &Backend{ - tempoBackend: cfg.Backend, - tlsEnabled: cfg.TLSEnabled, - tls: cfg.TLS, - httpClient: httpClient, - tenantHeaderKey: cfg.TenantHeaderKey, - QueryServicesDuration: queryServiceDuration, + logger: logger, + tempoBackend: cfg.Backend, + tlsEnabled: cfg.TLSEnabled, + tls: cfg.TLS, + httpClient: httpClient, + tenantHeaderKey: cfg.TenantHeaderKey, + QueryServicesDuration: queryServiceDuration, + findTracesConcurrentRequests: cfg.FindTracesConcurrentRequests, }, nil } @@ -306,6 +312,28 @@ func (b *Backend) GetOperations(ctx context.Context, _ *storage_v1.GetOperations }, nil } +type job struct { + ctx context.Context + traceID jaeger.TraceID +} + +type jobResult struct { + traceID jaeger.TraceID + trace *jaeger.Trace + err error +} + +func worker(b *Backend, jobs <-chan job, results chan<- jobResult) { + for job := range jobs { + jaegerTrace, err := b.getTrace(job.ctx, job.traceID) + results <- jobResult{ + traceID: job.traceID, + trace: jaegerTrace, + err: err, + } + } +} + func (b *Backend) FindTraces(req *storage_v1.FindTracesRequest, stream storage_v1.SpanReaderPlugin_FindTracesServer) error { ctx, span := tracer.Start(stream.Context(), "tempo-query.FindTraces") defer span.End() @@ -316,19 +344,46 @@ func (b *Backend) FindTraces(req *storage_v1.FindTracesRequest, stream storage_v } span.AddEvent(fmt.Sprintf("Found %d trace IDs", len(resp.TraceIDs))) + b.logger.Info("FindTraces: fetching traces", zap.Int("traceids", len(resp.TraceIDs))) + + numWorkers := b.findTracesConcurrentRequests + jobs := make(chan job, len(resp.TraceIDs)) + results := make(chan jobResult, len(resp.TraceIDs)) + var workersDone sync.WaitGroup + // Start workers + for w := 0; w < numWorkers; w++ { + workersDone.Add(1) + go func() { defer workersDone.Done(); worker(b, jobs, results) }() + } // for every traceID, get the full trace var jaegerTraces []*jaeger.Trace for _, traceID := range resp.TraceIDs { - trace, err := b.getTrace(ctx, traceID) - if err != nil { - // TODO this seems to be an internal inconsistency error, ignore so we can still show the rest - span.AddEvent(fmt.Sprintf("could not get trace for traceID %v", traceID)) + jobs <- job{ + ctx: ctx, + traceID: traceID, + } + } + close(jobs) + workersDone.Wait() + + var failedTraces []jobResult + // Collecting results + for i := 0; i < len(resp.TraceIDs); i++ { + result := <-results + if result.err != nil { + //// TODO this seems to be an internal inconsistency error, ignore so we can still show the rest + b.logger.Info("failed to get a trace", zap.Error(err), zap.String("traceid", result.traceID.String())) + span.AddEvent(fmt.Sprintf("could not get trace for traceID %v", result.traceID)) span.RecordError(err) - continue + failedTraces = append(failedTraces, result) + } else { + jaegerTraces = append(jaegerTraces, result.trace) } - - jaegerTraces = append(jaegerTraces, trace) + } + close(results) + if len(failedTraces) > 0 { + b.logger.Info("FindTraces: failed to find traces, getTrace failed", zap.Int32("limit", req.Query.NumTraces), zap.Int("failed", len(failedTraces))) } span.AddEvent(fmt.Sprintf("Returning %d traces", len(jaegerTraces))) diff --git a/cmd/tempo-serverless/Makefile b/cmd/tempo-serverless/Makefile index 86b419e09c0..78ddcd78c54 100644 --- a/cmd/tempo-serverless/Makefile +++ b/cmd/tempo-serverless/Makefile @@ -33,13 +33,14 @@ build-docker-lambda-test: $(IN_LAMBDA) CGO_ENABLED=0 go build -o ./lambda $(IN_LAMBDA) docker build -f ./Dockerfile -t tempo-serverless-lambda . -# lambda zips expect a compiled executable in root. the filename "main" is important -# as that should the handler config option in aws +# Lambda zips expect a compiled executable in the root. The filename "bootstrap" is important here. +# The new AWS Lambda runtime expects an executable with the name "bootstrap" to be provided, the "handler" configuration is ignored when using the new runtime. +# See https://aws.amazon.com/blogs/compute/migrating-aws-lambda-functions-from-the-go1-x-runtime-to-the-custom-runtime-on-amazon-linux-2/ for more info. .PHONY: build-lambda-zip build-lambda-zip: - $(IN_LAMBDA) CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o main - $(IN_LAMBDA) zip tempo-serverless-$(VERSION).zip main - $(IN_LAMBDA) rm main + $(IN_LAMBDA) CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bootstrap + $(IN_LAMBDA) zip tempo-serverless-$(VERSION).zip bootstrap + $(IN_LAMBDA) rm bootstrap .PHONY: test test: diff --git a/cmd/tempo-serverless/cloud-run/go.mod b/cmd/tempo-serverless/cloud-run/go.mod index f8bd78a3d37..d0fed2b6eb2 100644 --- a/cmd/tempo-serverless/cloud-run/go.mod +++ b/cmd/tempo-serverless/cloud-run/go.mod @@ -20,7 +20,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -59,11 +59,11 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.70 // indirect @@ -78,7 +78,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.0 // indirect + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect github.com/pelletier/go-toml/v2 v2.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect @@ -94,7 +94,6 @@ require ( github.com/rs/xid v1.5.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/sony/gobreaker v0.4.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -114,9 +113,9 @@ require ( go.opentelemetry.io/collector/semconv v0.105.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 // indirect - go.opentelemetry.io/otel v1.30.0 // indirect - go.opentelemetry.io/otel/metric v1.30.0 // indirect - go.opentelemetry.io/otel/trace v1.30.0 // indirect + go.opentelemetry.io/otel v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect + go.opentelemetry.io/otel/trace v1.31.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect @@ -126,7 +125,7 @@ require ( golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect diff --git a/cmd/tempo-serverless/cloud-run/go.sum b/cmd/tempo-serverless/cloud-run/go.sum index 06d0d648896..22c145f16b0 100644 --- a/cmd/tempo-serverless/cloud-run/go.sum +++ b/cmd/tempo-serverless/cloud-run/go.sum @@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= @@ -170,8 +170,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -188,8 +188,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= @@ -230,8 +230,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -269,8 +269,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ= github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ= @@ -313,6 +311,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ= @@ -327,14 +327,14 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0/go.mod h1:BMsdeOxN04K0L5FNUBfjFdvwWGNe/rkmSwH4Aelu/X0= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= -go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= -go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= @@ -388,8 +388,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= diff --git a/cmd/tempo-serverless/lambda/go.mod b/cmd/tempo-serverless/lambda/go.mod index 2463d03d31c..d488de58e40 100644 --- a/cmd/tempo-serverless/lambda/go.mod +++ b/cmd/tempo-serverless/lambda/go.mod @@ -22,7 +22,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -62,11 +62,11 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.70 // indirect @@ -81,7 +81,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.0 // indirect + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect github.com/pelletier/go-toml/v2 v2.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect @@ -98,7 +98,6 @@ require ( github.com/rs/xid v1.5.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/sony/gobreaker v0.4.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -118,9 +117,9 @@ require ( go.opentelemetry.io/collector/semconv v0.105.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 // indirect - go.opentelemetry.io/otel v1.30.0 // indirect - go.opentelemetry.io/otel/metric v1.30.0 // indirect - go.opentelemetry.io/otel/trace v1.30.0 // indirect + go.opentelemetry.io/otel v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect + go.opentelemetry.io/otel/trace v1.31.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect @@ -130,7 +129,7 @@ require ( golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect diff --git a/cmd/tempo-serverless/lambda/go.sum b/cmd/tempo-serverless/lambda/go.sum index 80a339939e6..259a30d11d9 100644 --- a/cmd/tempo-serverless/lambda/go.sum +++ b/cmd/tempo-serverless/lambda/go.sum @@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/aws/aws-lambda-go v1.28.0 h1:fZiik1PZqW2IyAN4rj+Y0UBaO1IDFlsNo9Zz/XnArK4= @@ -174,8 +174,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -192,8 +192,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= @@ -234,8 +234,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -274,8 +274,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= @@ -321,6 +319,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ= @@ -335,14 +335,14 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0/go.mod h1:BMsdeOxN04K0L5FNUBfjFdvwWGNe/rkmSwH4Aelu/X0= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= -go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= -go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= @@ -396,8 +396,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= diff --git a/cmd/tempo-vulture/mocks.go b/cmd/tempo-vulture/mocks.go index 3ffda22d42a..955d771d27b 100644 --- a/cmd/tempo-vulture/mocks.go +++ b/cmd/tempo-vulture/mocks.go @@ -148,6 +148,11 @@ func (m *MockHTTPClient) SearchTraceQL(query string) (*tempopb.SearchResponse, e panic("unimplemented") } +//nolint:all +func (m *MockHTTPClient) SearchTraceQLWithRangeAndLimit(query string, start int64, end int64, limit int64, spss int64) (*tempopb.SearchResponse, error) { + panic("unimplemented") +} + //nolint:all func (m *MockHTTPClient) SearchTraceQLWithRange(query string, start int64, end int64) (*tempopb.SearchResponse, error) { if m.err != nil { diff --git a/cmd/tempo/app/modules.go b/cmd/tempo/app/modules.go index d81d3ec69a6..2780ed31364 100644 --- a/cmd/tempo/app/modules.go +++ b/cmd/tempo/app/modules.go @@ -243,6 +243,10 @@ func (t *App) initDistributor() (services.Service, error) { t.Server.HTTPRouter().Handle("/distributor/ring", distributor.DistributorRing) } + if usageHandler := distributor.UsageTrackerHandler(); usageHandler != nil { + t.Server.HTTPRouter().Handle("/usage_metrics", usageHandler) + } + return t.distributor, nil } diff --git a/docs/sources/tempo/api_docs/_index.md b/docs/sources/tempo/api_docs/_index.md index abeb2e563a7..346e85cf3d2 100644 --- a/docs/sources/tempo/api_docs/_index.md +++ b/docs/sources/tempo/api_docs/_index.md @@ -31,13 +31,14 @@ For externally supported GRPC API, [see below](#tempo-grpc-api). | [Search tag names V2](#search-tags-v2) | Query-frontend | HTTP | `GET /api/v2/search/tags` | | [Search tag values](#search-tag-values) | Query-frontend | HTTP | `GET /api/search/tag//values` | | [Search tag values V2](#search-tag-values-v2) | Query-frontend | HTTP | `GET /api/v2/search/tag//values` | -| [TraceQL Metrics](#traceql-metrics) | Query-frontend | HTTP | `GET /api/metrics/query_range` | +| [TraceQL Metrics](#traceql-metrics) | Query-frontend | HTTP | `GET /api/metrics/query_range` | | [TraceQL Metrics (instant)](#instant) | Query-frontend | HTTP | `GET /api/metrics/query` | | [Query Echo Endpoint](#query-echo-endpoint) | Query-frontend | HTTP | `GET /api/echo` | | [Overrides API](#overrides-api) | Query-frontend | HTTP | `GET,POST,PATCH,DELETE /api/overrides` | | Memberlist | Distributor, Ingester, Querier, Compactor | HTTP | `GET /memberlist` | | [Flush](#flush) | Ingester | HTTP | `GET,POST /flush` | | [Shutdown](#shutdown) | Ingester | HTTP | `GET,POST /shutdown` | +| [Usage Metrics](#usage-metrics) | Distributor | HTTP | `GET /usage_metrics` | | [Distributor ring status](#distributor-ring-status) (*) | Distributor | HTTP | `GET /distributor/ring` | | [Ingesters ring status](#ingesters-ring-status) | Distributor, Querier | HTTP | `GET /ingester/ring` | | [Metrics-generator ring status](#metrics-generator-ring-status) (*) | Distributor | HTTP | `GET /metrics-generator/ring` | @@ -311,8 +312,9 @@ $ curl -G -s http://localhost:3200/api/search --data-urlencode 'tags=service.nam Ingester configuration `complete_block_timeout` affects how long tags are available for search. -This endpoint retrieves all discovered tag names that can be used in search. The endpoint is available in the query frontend service in -a microservices deployment, or the Tempo endpoint in a monolithic mode deployment. The tags endpoint takes a scope that controls the kinds +This endpoint retrieves all discovered tag names that can be used in search. +The endpoint is available in the query frontend service in a microservices deployment, or the Tempo endpoint in a monolithic mode deployment. +The tags endpoint takes a scope that controls the kinds of tags or attributes returned. If nothing is provided, the endpoint returns all resource and span tags. ``` @@ -343,6 +345,9 @@ $ curl -G -s http://localhost:3200/api/search/tags?scope=span | jq "starter", "version" ] + "metrics": { + "inspectedBytes": "630188" + } } ``` @@ -391,11 +396,9 @@ $ curl -G -s http://localhost:3200/api/v2/search/tags | jq { "scopes": [ { - "name": "span", + "name": "link", "tags": [ - "article.count", - "http.flavor", - "http.method", + "link-type" ] }, { @@ -405,16 +408,70 @@ $ curl -G -s http://localhost:3200/api/v2/search/tags | jq "service.name" ] }, + { + "name": "span", + "tags": [ + "article.count", + "http.flavor", + "http.method", + "http.request.header.accept", + "http.request_content_length", + "http.response.header.content-type", + "http.response_content_length", + "http.scheme", + "http.status_code", + "http.target", + "http.url", + "net.host.name", + "net.host.port", + "net.peer.name", + "net.peer.port", + "net.sock.family", + "net.sock.host.addr", + "net.sock.peer.addr", + "net.transport", + "numbers", + "one" + ] + }, { "name": "intrinsic", "tags": [ "duration", + "event:name", + "event:timeSinceStart", + "instrumentation:name", + "instrumentation:version", "kind", "name", - "status" + "rootName", + "rootServiceName", + "span:duration", + "span:kind", + "span:name", + "span:status", + "span:statusMessage", + "status", + "statusMessage", + "trace:duration", + "trace:rootName", + "trace:rootService", + "traceDuration" + ] + }, + { + "name": "event", + "tags": [ + "exception.escape", + "exception.message", + "exception.stacktrace", + "exception.type", ] } - ] + ], + "metrics": { + "inspectedBytes": "377046" + } } ``` @@ -440,13 +497,16 @@ This query returns all discovered values for the tag `service.name`. $ curl -G -s http://localhost:3200/api/search/tag/service.name/values | jq { "tagValues": [ - "adservice", - "cartservice", - "checkoutservice", - "frontend", - "productcatalogservice", - "recommendationservice" - ] + "article-service", + "auth-service", + "billing-service", + "cart-service", + "postgres", + "shop-backend" + ], + "metrics": { + "inspectedBytes": "431380" + } } ``` @@ -468,30 +528,37 @@ See [TraceQL]({{< relref "../traceql" >}}) documentation for more information. This example queries Tempo using curl and returns all discovered values for the tag `service.name`. ```bash -$ curl http://localhost:3200/api/v2/search/tag/.service.name/values | jq . +$ curl -G -s http://localhost:3200/api/v2/search/tag/.service.name/values | jq { "tagValues": [ { "type": "string", - "value": "customer" + "value": "article-service" + }, + { + "type": "string", + "value": "postgres" }, { "type": "string", - "value": "mysql" + "value": "cart-service" }, { "type": "string", - "value": "driver" + "value": "billing-service" }, { "type": "string", - "value": "frontend" + "value": "shop-backend" }, { "type": "string", - "value": "redis" + "value": "auth-service" } - ] + ], + "metrics": { + "inspectedBytes": "502756" + } } ``` This endpoint can also receive `start` and `end` optional parameters. These parameters define the time range from which the tags are fetched @@ -518,7 +585,9 @@ If a particular service name (for example, `shopping-cart`) is only present on s ### TraceQL Metrics -The TraceQL Metrics API returns Prometheus-like time-series for a given metrics query. Metrics queries are those using metrics functions like `rate()` and `quantile_over_time()`. See the [documentation]({{< relref "../traceql/metrics-queries" >}}) for the complete list. +The TraceQL Metrics API returns Prometheus-like time-series for a given metrics query. +Metrics queries are those using metrics functions like `rate()` and `quantile_over_time()`. +Refer to the [TraceQL metrics documentation](https://grafana.com/docs/tempo//traceql/metrics-queries/) for more information list. Parameters: @@ -529,21 +598,23 @@ Parameters: - `end = (unix epoch seconds | unix epoch nanoseconds | RFC3339 string)` Optional. Along with `start` define the time range. Providing both `start` and `end` includes blocks for the specified time range only. - `since = (duration string)` - Optional. Can be used instead of `start` and `end` to define the time range in relative values. For example `since=15m` will query the last 15 minutes. Default is last 1 hour. + Optional. Can be used instead of `start` and `end` to define the time range in relative values. For example, `since=15m` queries the last 15 minutes. Default is the last 1 hour. - `step = (duration string)` - Optional. Defines the granularity of the returned time-series. For example `step=15s` will return a data point every 15s within the time range. If not specified then the default behavior will choose a dynamic step based on the time range. + Optional. Defines the granularity of the returned time-series. For example, `step=15s` returns a data point every 15s within the time range. If not specified, then the default behavior chooses a dynamic step based on the time range. +- `exemplars = (integer)` + Optional. Defines the maximun number of exemplars for the query. It will be trimmed to max_exemplars if exceed it. The API is available in the query frontend service in a microservices deployment, or the Tempo endpoint in a monolithic mode deployment. -For example the following request computes the rate of spans received for `myservice` over the last three hours, at 1 minute intervals. +For example, the following request computes the rate of spans received for `myservice` over the last three hours, at 1 minute intervals. {{< admonition type="note" >}} Actual API parameters must be url-encoded. This example is left unencoded for readability. -{{% /admonition %}} +{{< /admonition >}} ``` -GET /api/metrics/query_range?q={resource.service.name="myservice"}|rate()&since=3h&step=1m +GET /api/metrics/query_range?q={resource.service.name="myservice"} | min_over_time() with(exemplars=true) &since=3h&step=1m&exemplars=100 ``` #### Instant @@ -619,6 +690,30 @@ ingester service. This is usually used at the time of scaling down a cluster. {{% /admonition %}} +### Usage metrics + +{{< admonition type="note" >}} +This endpoint is only available when one or more usage trackers are enabled in [the distributor]({{< relref "../configuration#distributor" >}}). +{{% /admonition %}} + +``` +GET /usage_metrics +``` + +Special metrics scrape endpoint that provides per-tenant metrics on ingested data. Per-tenant grouping rules are configured in [the per-tenant overrides]({{< relref "../configuration#overrides" >}}) + +Example: +``` +curl http://localhost:3200/usage_metrics +# HELP tempo_usage_tracker_bytes_received_total bytes total received with these attributes +# TYPE tempo_usage_tracker_bytes_received_total counter +tempo_usage_tracker_bytes_received_total{service="auth-service",tenant="single-tenant",tracker="cost-attribution"} 96563 +tempo_usage_tracker_bytes_received_total{service="cache",tenant="single-tenant",tracker="cost-attribution"} 81904 +tempo_usage_tracker_bytes_received_total{service="gateway",tenant="single-tenant",tracker="cost-attribution"} 164751 +tempo_usage_tracker_bytes_received_total{service="identity-service",tenant="single-tenant",tracker="cost-attribution"} 85974 +tempo_usage_tracker_bytes_received_total{service="service-A",tenant="single-tenant",tracker="cost-attribution"} 92799 +``` + ### Distributor ring status {{< admonition type="note" >}} @@ -763,6 +858,6 @@ service StreamingQuerier { rpc SearchTagsV2(SearchTagsRequest) returns (stream SearchTagsV2Response) {} rpc SearchTagValues(SearchTagValuesRequest) returns (stream SearchTagValuesResponse) {} rpc SearchTagValuesV2(SearchTagValuesRequest) returns (stream SearchTagValuesV2Response) {} - rpc MetricsQueryRange(QueryRangeRequest) returns (stream QueryRangeResponse) {} + rpc MetricsQueryRange(QueryRangeRequest) returns (stream QueryRangeResponse) {} } ``` diff --git a/docs/sources/tempo/api_docs/metrics-summary.md b/docs/sources/tempo/api_docs/metrics-summary.md index 9f0933b5a85..2d27ed9a6e6 100644 --- a/docs/sources/tempo/api_docs/metrics-summary.md +++ b/docs/sources/tempo/api_docs/metrics-summary.md @@ -10,7 +10,8 @@ weight: 600 # Metrics summary API {{< admonition type="warning" >}} -The Metrics summary API is an [experimental feature](/docs/release-life-cycle) that is disabled by default. To enable it, adjust your configuration as suggested below. +The metrics summary API is deprecated as of Tempo 2.7. Features powered by the metrics summary API, like the Aggregate by table, are also deprecated in Grafana Cloud and Grafana 11.3 and later. +It will be removed in a future release. {{% /admonition %}} This document explains how to use the metrics summary API in Tempo. @@ -18,9 +19,9 @@ This API returns RED metrics (span count, erroring span count, and latency infor {{< youtube id="g97CjKOZqT4" >}} -## Configuration +## Activate metrics summary -To enable the experimental metrics summary API, you must turn on the local blocks processor in the metrics generator. +To enable the (deprecated) metrics summary API, you must turn on the local blocks processor in the metrics generator. Be aware that the generator uses considerably more resources, including disk space, if it's enabled: ```yaml @@ -30,6 +31,9 @@ overrides: processors: [..., 'local-blocks'] ``` +In Grafana and Grafana Cloud, the Metrics summary API is disabled by default. +To enable it in Grafana Cloud, contact Grafana Support. + ## Request To make a request to this API, use the following endpoint on the query-frontend: diff --git a/docs/sources/tempo/api_docs/pushing-spans-with-http.md b/docs/sources/tempo/api_docs/pushing-spans-with-http.md index 4f52c1d0864..34ee953cb23 100644 --- a/docs/sources/tempo/api_docs/pushing-spans-with-http.md +++ b/docs/sources/tempo/api_docs/pushing-spans-with-http.md @@ -72,7 +72,7 @@ curl -X POST -H 'Content-Type: application/json' http://localhost:4318/v1/traces }' ``` -Note that the `startTimeUnixNano` field is in nanoseconds and can be obtained by any tool that provides the epoch date in nanoseconds (for example, under Linux, `date +%s%8N`). The `endTimeUnixNano` field is also in nanoseconds, where 100000000 nanoseconds is 100 milliseconds. +Note that the `startTimeUnixNano` field is in nanoseconds and can be obtained by any tool that provides the epoch date in nanoseconds (for example, under Linux, `date +%s%N`). The `endTimeUnixNano` field is also in nanoseconds, where 100000000 nanoseconds is 100 milliseconds. 1. Copy and paste the curl command into a text editor. diff --git a/docs/sources/tempo/configuration/_index.md b/docs/sources/tempo/configuration/_index.md index 6a35e9f4744..3e48575f5e6 100644 --- a/docs/sources/tempo/configuration/_index.md +++ b/docs/sources/tempo/configuration/_index.md @@ -228,6 +228,19 @@ distributor: # defaults to 0 which means that by default ResourceExhausted is not retried. Set this to a duration such as `1s` to # instruct the client how to retry. [retry_after_on_resource_exhausted: | default = '0' ] + + # Optional. + # Configures usage trackers in the distributor which expose metrics of ingested traffic grouped by configurable + # attributes exposed on /usage_metrics. + usage: + cost_attribution: + # Enables the "cost-attribution" usage tracker. Per-tenant attributes are configured in overrides. + [enabled: | default = false] + # Maximum number of series per tenant. + [max_cardinality: | default = 10000] + # Interval after which a series is considered stale and will be deleted from the registry. + # Once a metrics series is deleted, it won't be emitted anymore, keeping active series low. + [stale_duration: | default = 15m0s] ``` ## Ingester @@ -287,6 +300,10 @@ For more information on the metrics-generator, refer to the [Metrics-generator d Metrics-generator processors are disabled by default. To enable it for a specific tenant, set `metrics_generator.processors` in the [overrides](#overrides) section. +{{< admonition type="note" >}} +If you want to enable metrics-generator for your Grafana Cloud account, refer to the [Metrics-generator in Grafana Cloud](https://grafana.com/docs/grafana-cloud/send-data/traces/metrics-generator/) documentation. +{{< /admonition >}} + You can limit spans with end times that occur within a configured duration to be considered in metrics generation using `metrics_ingestion_time_range_slack`. In Grafana Cloud, this value defaults to 30 seconds so all spans sent to the metrics-generation more than 30 seconds in the past are discarded or rejected. @@ -366,6 +383,8 @@ metrics_generator: [peer_attributes: | default = ["peer.service", "db.name", "db.system"] ] # Attribute Key to multiply span metrics + # Note that the attribute name is searched for in both + # resouce and span level attributes [span_multiplier_key: | default = ""] # Enables additional labels for services and virtual nodes. @@ -409,6 +428,8 @@ metrics_generator: [enable_target_info: | default = false] # Attribute Key to multiply span metrics + # Note that the attribute name is searched for in both + # resouce and span level attributes [span_multiplier_key: | default = ""] # List of policies that will be applied to spans for inclusion or exclusion. @@ -464,7 +485,7 @@ metrics_generator: [collection_interval: | default = 15s] # Interval after which a series is considered stale and will be deleted from the registry. - # Once a metrics series is deleted it won't be emitted anymore, keeping active series low. + # Once a metrics series is deleted, it won't be emitted anymore, keeping active series low. [stale_duration: | default = 15m] # A list of labels that will be added to all generated metrics. @@ -610,7 +631,7 @@ query_frontend: # If set to a non-zero value, it's value will be used to decide if query is within SLO or not. # Query is within SLO if it returned 200 within duration_slo seconds OR processed throughput_slo bytes/s data. - # NOTE: `duration_slo` and `throughput_bytes_slo` both must be configured for it to work + # NOTE: Requires `duration_slo` AND `throughput_bytes_slo` to be configured. [duration_slo: | default = 0s ] # If set to a non-zero value, it's value will be used to decide if query is within SLO or not. @@ -619,6 +640,17 @@ query_frontend: # The number of shards to break ingester queries into. [ingester_shards]: | default = 1] + + # SLO configuration for Metadata (tags and tag values) endpoints. + metadata_slo: + # If set to a non-zero value, it's value will be used to decide if metadata query is within SLO or not. + # Query is within SLO if it returned 200 within duration_slo seconds OR processed throughput_slo bytes/s data. + # NOTE: Requires `duration_slo` AND `throughput_bytes_slo` to be configured. + [duration_slo: | default = 0s ] + + # If set to a non-zero value, it's value will be used to decide if metadata query is within SLO or not. + # Query is within SLO if it returned 200 within duration_slo seconds OR processed throughput_slo bytes/s data. + [throughput_bytes_slo: | default = 0 ] # Trace by ID lookup configuration trace_by_id: @@ -646,6 +678,9 @@ query_frontend: # 0 disables this limit. [max_duration: | default = 3h ] + # Maximun number of exemplars per range query. Limited to 100. + [max_exemplars: | default = 100 ] + # query_backend_after controls where the query-frontend searches for traces. # Time ranges older than query_backend_after will be searched in the backend/object storage only. # Time ranges between query_backend_after and now will be queried from the metrics-generators. @@ -662,6 +697,7 @@ query_frontend: # If set to a non-zero value, it's value will be used to decide if query is within SLO or not. # Query is within SLO if it returned 200 within duration_slo seconds OR processed throughput_slo bytes/s data. [throughput_bytes_slo: | default = 0 ] + ``` ## Querier @@ -1700,6 +1736,13 @@ overrides: scope: # scope of the attribute. options: resource, span ] + # Cost attribution usage tracker configuration + cost_attribution: + # List of attributes to group ingested data by. Map value is optional. Can be used to rename and + # combine attributes. + dimensions: + + # Tenant-specific overrides settings configuration file. The empty string (default # value) disables using an overrides file. [per_tenant_override_config: | default = ""] diff --git a/docs/sources/tempo/configuration/grafana-alloy/service-graphs.md b/docs/sources/tempo/configuration/grafana-alloy/service-graphs.md index 00677ca4553..4f4de7aaf98 100644 --- a/docs/sources/tempo/configuration/grafana-alloy/service-graphs.md +++ b/docs/sources/tempo/configuration/grafana-alloy/service-graphs.md @@ -18,7 +18,7 @@ This is more efficient and recommended for larger installations. For a deep look into service graphs, visit [this section](https://grafana.com/docs/tempo//metrics-generator/service_graphs). Service graphs are also used in the application performance management dashboard. -For more information, refer to the [service graph view documentation](https://grafana.com/docs/tempo//metrics-generator/service_graph-view). +For more information, refer to the [service graph view documentation](https://grafana.com/docs/tempo//metrics-generator/service-graph-view). ## Before you begin diff --git a/docs/sources/tempo/configuration/grafana-alloy/span-metrics.md b/docs/sources/tempo/configuration/grafana-alloy/span-metrics.md index d09f1ef4274..25a2a482514 100644 --- a/docs/sources/tempo/configuration/grafana-alloy/span-metrics.md +++ b/docs/sources/tempo/configuration/grafana-alloy/span-metrics.md @@ -83,7 +83,7 @@ otelcol.exporter.otlp "default" { ``` Span metrics are also used in the service graph view. -For more information, refer to the [service graph view](https://grafana.com/docs/tempo//metrics-generator/service_graph-view). +For more information, refer to the [service graph view](https://grafana.com/docs/tempo//metrics-generator/service-graph-view). To see all the available configuration options, refer to the [component reference](https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.spanmetrics/). diff --git a/docs/sources/tempo/configuration/hosted-storage/s3.md b/docs/sources/tempo/configuration/hosted-storage/s3.md index e9a0e712088..2dc3730e236 100644 --- a/docs/sources/tempo/configuration/hosted-storage/s3.md +++ b/docs/sources/tempo/configuration/hosted-storage/s3.md @@ -18,6 +18,7 @@ The following authentication methods are supported: - MinIO client credentials [configuration file](https://github.com/minio/mc/blob/master/docs/minio-client-configuration-files.md) - AWS IAM ([IRSA via WebIdentity](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html), - AWS [EC2 instance role](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html)) +- AWS [EKS Pod Identity](https://docs.aws.amazon.com/eks/latest/userguide/pod-identities.html) The following IAM policy shows minimal permissions required by Tempo, where the bucket has already been created. diff --git a/docs/sources/tempo/configuration/manifest.md b/docs/sources/tempo/configuration/manifest.md index 07f78ee1cab..ec2f36c6965 100644 --- a/docs/sources/tempo/configuration/manifest.md +++ b/docs/sources/tempo/configuration/manifest.md @@ -18,7 +18,7 @@ go run ./cmd/tempo --storage.trace.backend=local --storage.trace.local.path=/var ## Complete configuration {{< admonition type="note" >}} -This manifest was generated on 2023-11-13. +This manifest was generated on 2024-10-21. {{% /admonition %}} ```yaml @@ -33,6 +33,7 @@ server: grpc_listen_address: "" grpc_listen_port: 9095 grpc_listen_conn_limit: 0 + proxy_protocol_enabled: false tls_cipher_suites: "" tls_min_version: "" http_tls_config: @@ -70,6 +71,8 @@ server: grpc_server_min_time_between_pings: 10s grpc_server_ping_without_stream_allowed: true grpc_server_num_workers: 0 + grpc_server_stats_tracking_enabled: true + grpc_server_recv_buffer_pools_enabled: false log_format: logfmt log_level: info log_source_ips_enabled: false @@ -89,6 +92,7 @@ internal_server: grpc_listen_address: "" grpc_listen_port: 0 grpc_listen_conn_limit: 0 + proxy_protocol_enabled: false tls_cipher_suites: "" tls_min_version: "" http_tls_config: @@ -126,6 +130,8 @@ internal_server: grpc_server_min_time_between_pings: 0s grpc_server_ping_without_stream_allowed: false grpc_server_num_workers: 0 + grpc_server_stats_tracking_enabled: false + grpc_server_recv_buffer_pools_enabled: false log_format: logfmt log_level: info log_source_ips_enabled: false @@ -180,6 +186,11 @@ distributor: receivers: {} override_ring_key: distributor forwarders: [] + usage: + cost_attribution: + enabled: false + max_cardinality: 10000 + stale_duration: 15m0s extend_writes: true retry_after_on_resource_exhausted: 0s ingester_client: @@ -314,7 +325,9 @@ query_frontend: max_duration: 3h0m0s query_backend_after: 30m0s interval: 5m0s + max_exemplars: 100 multi_tenant_queries_enabled: true + response_consumers: 10 compactor: ring: kvstore: @@ -582,7 +595,7 @@ metrics_generator: path: "" v2_encoding: none search_encoding: none - ingestion_time_range_slack: 0s + ingestion_time_range_slack: 2m0s version: vParquet4 metrics_ingestion_time_range_slack: 30s query_timeout: 30s @@ -620,11 +633,13 @@ storage: offset_index: false blocklist_poll: 5m0s blocklist_poll_concurrency: 50 + blocklist_poll_tenant_concurrency: 0 blocklist_poll_fallback: true blocklist_poll_tenant_index_builders: 2 blocklist_poll_stale_tenant_index: 0s blocklist_poll_jitter_ms: 0 blocklist_poll_tolerate_consecutive_errors: 1 + blocklist_poll_tolerate_tenant_failures: 1 empty_tenant_deletion_enabled: false empty_tenant_deletion_age: 0s backend: local @@ -699,6 +714,9 @@ overrides: max_traces_per_user: 10000 read: max_bytes_per_tag_values_query: 5000000 + metrics_generator: + generate_native_histograms: classic + ingestion_time_range_slack: 0s global: max_bytes_per_trace: 5000000 per_tenant_override_config: "" @@ -788,6 +806,7 @@ memberlist: rejoin_interval: 0s left_ingesters_timeout: 5m0s leave_timeout: 20s + broadcast_timeout_for_local_updates_on_shutdown: 10s message_history_buffer_bytes: 0 bind_addr: [] bind_port: 7946 diff --git a/docs/sources/tempo/metrics-generator/_index.md b/docs/sources/tempo/metrics-generator/_index.md index 9585fd3510e..9c8a7436cc2 100644 --- a/docs/sources/tempo/metrics-generator/_index.md +++ b/docs/sources/tempo/metrics-generator/_index.md @@ -10,13 +10,9 @@ weight: 500 # Metrics-generator Metrics-generator is an optional Tempo component that derives metrics from ingested traces. -If present, the distributor will write received spans to both the ingester and the metrics-generator. +If present, the distributor writes received spans to both the ingester and the metrics-generator. The metrics-generator processes spans and writes metrics to a Prometheus data source using the Prometheus remote write protocol. -{{< admonition type="note" >}} -Enabling metrics generation and remote writing them to Grafana Cloud Metrics produces extra active series that could impact your billing. For more information on billing, refer to [Billing and usage](/docs/grafana-cloud/billing-and-usage/). -{{% /admonition %}} - ## Architecture Metrics-generator leverages the data available in the ingest path in Tempo to provide additional value by generating metrics from traces. @@ -76,3 +72,10 @@ high-resolution data. Users must [update the receiving endpoint](https://grafana histograms, and [update histogram queries](https://grafana.com/docs/mimir//visualize/native-histograms/) in their dashboards. To learn more about the configuration, refer to the [Metrics-generator]({{< relref "../configuration#metrics-generator" >}}) section of the Tempo Configuration documentation. + +## Use metrics-generator in Grafana Cloud + +If you want to enable metrics-generator for your Grafana Cloud account, refer to the [Metrics-generator in Grafana Cloud](https://grafana.com/docs/grafana-cloud/send-data/traces/metrics-generator/) documentation. + +Enabling metrics generation and remote writing them to Grafana Cloud Metrics produces extra active series that could impact your billing. +For more information on billing, refer to [Billing and usage](/docs/grafana-cloud/billing-and-usage/). diff --git a/docs/sources/tempo/metrics-generator/service_graphs/_index.md b/docs/sources/tempo/metrics-generator/service_graphs/_index.md index 490e3a777ac..4003d2ebe53 100644 --- a/docs/sources/tempo/metrics-generator/service_graphs/_index.md +++ b/docs/sources/tempo/metrics-generator/service_graphs/_index.md @@ -8,9 +8,6 @@ weight: 300 # Service graphs -{{< docs/alias from="/docs/tempo/latest/server_side_metrics/service_graphs/" to="/docs/tempo/latest/metrics-generator/service_graphs/" >}} - - A service graph is a visual representation of the interrelationships between various services. Service graphs help you to understand the structure of a distributed system, and the connections and dependencies between its components: diff --git a/docs/sources/tempo/metrics-generator/span_metrics.md b/docs/sources/tempo/metrics-generator/span_metrics.md index dd89401f6d2..68430e3feca 100644 --- a/docs/sources/tempo/metrics-generator/span_metrics.md +++ b/docs/sources/tempo/metrics-generator/span_metrics.md @@ -1,7 +1,6 @@ --- aliases: - - /docs/tempo/latest/server_side_metrics/span_metrics/ - - /docs/tempo/latest/metrics-generator/span_metrics/ + - ../server_side_metrics/span_metrics/ # /docs/tempo/latest/server_side_metrics/span_metrics/ title: Span metrics description: The span metrics processor generates metrics from ingested tracing data, including request, error, and duration (RED) metrics. weight: 200 @@ -20,10 +19,6 @@ Span metrics are of particular interest if your system is not monitored with met but it has distributed tracing implemented. You get out-of-the-box metrics from your tracing pipeline. -{{< admonition type="note" >}} -Metrics generation is disabled by default. Contact Grafana Support to enable metrics generation in your organization. -{{% /admonition %}} - Even if you already have metrics, span metrics can provide in-depth monitoring of your system. The generated metrics will show application level insight into your monitoring, as far as tracing gets propagated through your applications. @@ -35,7 +30,10 @@ exemplars can be automatically added, providing additional value to these metric ## How to run -To enable span metrics in Tempo/GET, enable the metrics generator and add an overrides section which enables the `span-metrics` generator. See [here for configuration details]({{< relref "../configuration#metrics-generator" >}}). +To enable span metrics in Tempo or Grafana Enterprise Traces, enable the metrics generator and add an overrides section which enables the `span-metrics` processor. +Refer to [the configuration details]({{< relref "../configuration#metrics-generator" >}}). + +If you want to enable metrics-generator for your Grafana Cloud account, refer to the [Metrics-generator in Grafana Cloud](https://grafana.com/docs/grafana-cloud/send-data/traces/metrics-generator/) documentation. ## How it works diff --git a/docs/sources/tempo/operations/traceql-metrics.md b/docs/sources/tempo/operations/traceql-metrics.md index 5b541814813..93320260806 100644 --- a/docs/sources/tempo/operations/traceql-metrics.md +++ b/docs/sources/tempo/operations/traceql-metrics.md @@ -26,10 +26,14 @@ To use the metrics generated from traces, you need to: * Set the `local-blocks` processor to active in your `metrics-generator` configuration * Configure a Tempo data source in Grafana or Grafana Cloud -* Access Grafana Cloud or Grafana version 10.4 or newer +* Access Grafana Cloud or Grafana version 10.4 or later ## Activate and configure the `local-blocks` processor +The local-blocks processor must be enabled to start using metrics queries like `{ } | rate()`. +If not enabled, then the metrics queries fail with the error `localblocks processor not found`. +Enabling the `local-blocks` processor can be done either per tenant or in all tenants. + To activate the `local-blocks` processor for all users, add it to the list of processors in the `overrides` block of your Tempo configuration. ```yaml @@ -38,30 +42,87 @@ overrides: metrics_generator_processors: ['local-blocks'] ``` -To configure the processor per tenant, use the `metrics_generator.processor` override. +To configure the processor per tenant, use the `metrics_generator_processor` override. + +Example for per-tenant in the per-tenant overrides: + + ```yaml + overrides: + 'tenantID': + metrics_generator_processors: + - local-blocks + ``` + +By default, for all tenants in the main configuration: + + ```yaml + overrides: + defaults: + metrics_generator: + processors: [local-blocks] + ``` + +Add this configuration to run TraceQL metrics queries against all spans (and not just server spans): + +```yaml +metrics_generator: + processor: + local_blocks: + filter_server_spans: false +``` + +To run metrics queries on historical data, you must configure the local-blocks processor to flush RF1 blocks to object storage: + +```yaml +metrics_generator: + processor: + local_blocks: + flush_to_storage: true +``` + +Setting `flush_to_storage` to `true` ensures that metrics blocks are flushed to storage so TraceQL metrics queries against historical data. + +For more information about overrides, refer to [Standard overrides](https://grafana.com/docs/tempo//configuration/#standard-overrides). + + ```yaml + overrides: + 'tenantID': + metrics_generator_processors: + - local-blocks + ``` -For more information about overrides, refer to [Standard overrides]({{< relref "../configuration#standard-overrides" >}}). +By default, for all tenants in the main configuration: -### Configure the processor + ```yaml + overrides: + defaults: + metrics_generator: + processors: [local-blocks] + ``` -Next, configure the `local-blocks` processor to record all spans for TraceQL metrics. -Here is an example configuration: +Add this configuration to run TraceQL metrics queries against all spans (and not just server spans): ```yaml - metrics_generator: +metrics_generator: processor: local_blocks: filter_server_spans: false - storage: - path: /var/tempo/generator/wal - traces_storage: - path: /var/tempo/generator/traces ``` -If you configured Tempo using the `tempo-distributed` Helm chart, you can also set `traces_storage` using your `values.yaml` file. Refer to the [Helm chart for an example](https://github.com/grafana/helm-charts/blob/559ecf4a9c9eefac4521454e7a8066778e4eeff7/charts/tempo-distributed/values.yaml#L362). +If you configured Tempo using the `tempo-distributed` Helm chart, you can also set `traces_storage` using your `values.yaml` file. +Refer to the [Helm chart for an example](https://github.com/grafana/helm-charts/blob/559ecf4a9c9eefac4521454e7a8066778e4eeff7/charts/tempo-distributed/values.yaml#L362). +```yaml +metrics_generator: + processor: + local_blocks: + flush_to_storage: true +``` + +Setting `flush_to_storage` to `true` ensures that metrics blocks are flushed to storage so TraceQL metrics queries against historical data. + +For more information about overrides, refer to [Standard overrides](https://grafana.com/docs/tempo//configuration/#standard-overrides). -Refer to the [metrics-generator configuration]({{< relref "../configuration#metrics-generator" >}}) documentation for more information. ## Evaluate query timeouts @@ -109,4 +170,4 @@ query_frontend: metrics: concurrent_jobs: 8 target_bytes_per_job: 1.25e+09 # ~1.25GB -``` \ No newline at end of file +``` diff --git a/docs/sources/tempo/release-notes/v2-6.md b/docs/sources/tempo/release-notes/v2-6.md index 889cf25d49a..6a753fd6a47 100644 --- a/docs/sources/tempo/release-notes/v2-6.md +++ b/docs/sources/tempo/release-notes/v2-6.md @@ -98,7 +98,16 @@ This improvement is a result of some of these changes: ### Other enhancements and improvements -This release also has these notable updates: +This release also has these notable updates. + +#### 2.6.1 + +* Register gRPC health server to tempo-query. [[PR 4178]](https://github.com/grafana/tempo/pull/4178) +* Support Tempo on IBM s390x. [[PR 4175]](https://github.com/grafana/tempo/pull/4175) +* tempo-query: Separate TLS settings for server and client. [[PR 4177]](https://github.com/grafana/tempo/pull/4177) +* Speedup tempo-query trace search by allowing parallel queries. [[PR 4159]](https://github.com/grafana/tempo/pull/4159) + +#### 2.6.0 * Bring back OTel receiver metrics. [[PR 3917](https://github.com/grafana/tempo/pull/3917)] * Add a `q` parameter to `/api/v2/search/tags` for tag name filtering. [[PR 3822](https://github.com/grafana/tempo/pull/3822)] @@ -118,11 +127,15 @@ When [upgrading](https://grafana.com/docs/tempo/latest/setup/upgrade/) to Tempo We've changed to an RF1 (Replication Factor 1) pattern for TraceQL metrics as we were unable to hit performance goals for RF3 de-duplication. This requires some operational changes to query TraceQL metrics. -TraceQL metrics are still considered experimental. We hope to mark them GA soon when we productionize a complete RF1 write-read path. [PRs [3628](https://github.com/grafana/tempo/pull/3628), [3691]([https://github.com/grafana/tempo/pull/3691](https://github.com/grafana/tempo/pull/3691)), [3723]([https://github.com/grafana/tempo/pull/3723](https://github.com/grafana/tempo/pull/3723)), [3995]([https://github.com/grafana/tempo/pull/3995](https://github.com/grafana/tempo/pull/3995))] +TraceQL metrics are still considered experimental. +We hope to mark them GA soon when we productionize a complete RF1 write-read path. +[PRs [3628](https://github.com/grafana/tempo/pull/3628), [3691]([https://github.com/grafana/tempo/pull/3691](https://github.com/grafana/tempo/pull/3691)), [3723]([https://github.com/grafana/tempo/pull/3723](https://github.com/grafana/tempo/pull/3723)), [3995]([https://github.com/grafana/tempo/pull/3995](https://github.com/grafana/tempo/pull/3995))] **For recent data** -The local-blocks processor must be enabled to start using metrics queries like `{ } | rate()`. If not enabled metrics queries fail with the error `localblocks processor not found`. Enabling the local-blocks processor can be done either per tenant or in all tenants. +The local-blocks processor must be enabled to start using metrics queries like `{ } | rate()`. +If not enabled metrics queries fail with the error `localblocks processor not found`. +Enabling the local-blocks processor can be done either per tenant or in all tenants. * Per-tenant in the per-tenant overrides: @@ -228,10 +241,20 @@ Storage:
+### Other breaking changes + +* **BREAKING CHANGE** tempo-query is no longer a Jaeger instance with grpcPlugin. It's now a standalone server. Serving a gRPC API for Jaeger on `0.0.0.0:7777` by default. [[PR 3840]](https://github.com/grafana/tempo/issues/3840) + ## Bugfixes For a complete list, refer to the [Tempo changelog](https://github.com/grafana/tempo/releases). +### 2.6.1 + +* Bring back application-json content-type header. [[PR 4123]](https://github.com/grafana/tempo/pull/4123) + +### 2.6.0 + * Fix panic in certain metrics queries using `rate()` with `by`. [[PR 3847](https://github.com/grafana/tempo/pull/3847)] * Fix metrics queries when grouping by attributes that may not exist. [[PR 3734](https://github.com/grafana/tempo/pull/3734)] * Fix metrics query histograms and quantiles on `traceDuration`. [[PR 3879](https://github.com/grafana/tempo/pull/3879)] diff --git a/docs/sources/tempo/setup/upgrade.md b/docs/sources/tempo/setup/upgrade.md index 650632e9caa..3046a3f1f0d 100644 --- a/docs/sources/tempo/setup/upgrade.md +++ b/docs/sources/tempo/setup/upgrade.md @@ -22,7 +22,7 @@ You can check your configuration options using the [`status` API endpoint]({{< r ## Upgrade to Tempo 2.6 -Tempo 2.5 has several considerations for any upgrade: +Tempo 2.6 has several considerations for any upgrade: * Operational change for TraceQL metrics * vParquet4 is now the default block format @@ -145,6 +145,11 @@ For information on upgrading, refer to [Upgrade to Tempo 2.6](https://grafana.co +### tempo-query is a standalone server + +With Tempo 2.6.1, tempo-query is no longer a Jaeger instance with grpcPlugin. +It’s now a standalone server. +Serving a gRPC API for Jaeger on 0.0.0.0:7777 by default. [PR 3840] ## Upgrade to Tempo 2.5 diff --git a/docs/sources/tempo/traceql/_index.md b/docs/sources/tempo/traceql/_index.md index 47bc28b5c19..cc97015b423 100644 --- a/docs/sources/tempo/traceql/_index.md +++ b/docs/sources/tempo/traceql/_index.md @@ -47,7 +47,8 @@ The [GRPC streaming API endpoint]({{< relref "../api_docs#tempo-grpc-api" >}}) i The `tempo-cli` also uses this streaming endpoint. For more information, refer to the [Tempo CLI documentation]({{< relref "../operations/tempo_cli#query-api-command" >}}). -To use streaming in Grafana, you must first enable the `traceQLStreaming` feature toggle. +To use streaming in Grafana, you must have `stream_over_http_enabled: true` enabled in Tempo. +For information, refer to [Tempo GRPC API](https://grafana.com/docs/tempo/latest/api_docs/#tempo-grpc-api). ## Construct a TraceQL query @@ -416,6 +417,12 @@ For example, find traces that have more than 3 spans with an attribute `http.sta { span.http.status_code = 200 } | count() > 3 ``` +To find spans where the total of a made-up attribute `bytesProcessed` was more than 1 GB: + +``` +{ } | sum(span.bytesProcessed) > 1000000000 +``` + ## Grouping TraceQL supports a grouping pipeline operator that can be used to group by arbitrary attributes. This can be useful to diff --git a/docs/sources/tempo/traceql/metrics-queries/_index.md b/docs/sources/tempo/traceql/metrics-queries/_index.md new file mode 100644 index 00000000000..9f4c2a2fe77 --- /dev/null +++ b/docs/sources/tempo/traceql/metrics-queries/_index.md @@ -0,0 +1,87 @@ +--- +title: TraceQL metrics queries +menuTitle: TraceQL metrics queries +description: Learn about TraceQL metrics queries +weight: 600 +keywords: + - metrics query + - TraceQL metrics +--- + +# TraceQL metrics queries + +{{< docs/experimental product="TraceQL metrics" >}} + +TraceQL metrics is an experimental feature in Grafana Tempo that creates metrics from traces. + +Metric queries extend trace queries by applying a function to trace query results. +This powerful feature allows for ad hoc aggregation of any existing TraceQL query by any dimension available in your traces, much in the same way that LogQL metric queries create metrics from logs. + +Traces are a unique observability signal that contain causal relationships between the components in your system. + +TraceQL metrics can help answer questions like this: + +* How many database calls across all systems are downstream of your application? +* What services beneath a given endpoint are currently failing? +* What services beneath an endpoint are currently slow? + +TraceQL metrics can help you answer these questions by parsing your traces in aggregate. + +TraceQL metrics are powered by the [TraceQL metrics API](https://grafana.com/docs/tempo//api_docs/#traceql-metrics). + +![Metrics visualization in Grafana](/media/docs/tempo/metrics-explore-sample-2.4.png) + +## RED metrics, TraceQL, and PromQL + +RED is an acronym for three types of metrics: + +- Rate, the number of requests per second +- Errors, the number of those requests that are failing +- Duration, the amount of time those requests take + +For more information about the RED method, refer to [The RED Method: how to instrument your services](/blog/2018/08/02/the-red-method-how-to-instrument-your-services/). + +You can write TraceQL metrics queries to compute rate, errors, and durations over different groups of spans. + +For more information on how to use TraceQL metrics to investigate issues, refer to [Solve problems with metrics queries](./solve-problems-metrics-queries). + +## Enable and use TraceQL metrics + +To use TraceQL metrics, you need to enable them on your Tempo database. +Refer to [Configure TraceQL metrics](https://grafana.com/docs/tempo//operations/traceql-metrics/) for more information. + +From there, you can either query the TraceQL metrics API directly (for example, with `curl`) or using Grafana +(recommended). +To run TraceQL metrics queries in Grafana, you need Grafana Cloud or Grafana 10.4 or later. +No extra configuration is needed. +Use a Tempo data source that points to a Tempo database with TraceQL metrics enabled. + +Refer to [Solve problems using metrics queries](./solve-problems-metrics-queries/) for some real-world examples. + +### Functions + +TraceQL metrics queries currently include the following functions for aggregating over groups of spans: `rate`, `count_over_time`, `quantile_over_time`, `histogram_over_time`, and `compare`. +These functions can be added as an operator at the end of any TraceQL query. + +For detailed information and example queries for each function, refer to [TraceQL metrics functions](./functions). + +### Exemplars + +Exemplars are a powerful feature of TraceQL metrics. +They allow you to see an exact trace that contributed to a given metric value. +This is particularly useful when you want to understand why a given metric is high or low. + +Exemplars are available in TraceQL metrics for all range queries. +To get exemplars, you need to configure it in the query-frontend with the parameter `query_frontend.metrics.max_exemplars`, +or pass a query hint in your query. + +Example: + +``` +{ span:name = "GET /:endpoint" } | quantile_over_time(duration, .99) by (span.http.target) with (exemplars=true) +``` + +{{< admonition type="note" >}} +TraceQL metric queries with exemplars aren't fully supported in Grafana Explore. +They will be supported in a future Grafana release. +{{< /admonition >}} diff --git a/docs/sources/tempo/traceql/metrics-queries.md b/docs/sources/tempo/traceql/metrics-queries/functions.md similarity index 53% rename from docs/sources/tempo/traceql/metrics-queries.md rename to docs/sources/tempo/traceql/metrics-queries/functions.md index 52b44ecdeec..2c20a34674f 100644 --- a/docs/sources/tempo/traceql/metrics-queries.md +++ b/docs/sources/tempo/traceql/metrics-queries/functions.md @@ -1,69 +1,34 @@ --- -title: TraceQL metrics queries -menuTitle: TraceQL metrics queries -description: Learn about TraceQL metrics queries +title: TraceQL metrics functions +menuTitle: TraceQL metrics functions +description: Learn about functions used in TraceQL metrics queries weight: 600 keywords: - metrics query - TraceQL metrics --- -# TraceQL metrics queries +# TraceQL metrics functions -{{< docs/experimental product="TraceQL metrics" >}} + -TraceQL metrics is an experimental feature in Grafana Tempo that creates metrics from traces. +TraceQL supports `rate`, `count_over_time`, `quantile_over_time`, `histogram_over_time`, and `compare` functions. -Metric queries extend trace queries by applying a function to trace query results. -This powerful feature allows for adhoc aggregation of any existing TraceQL query by any dimension available in your traces, much in the same way that LogQL metric queries create metrics from logs. +## Available functions -Traces are a unique observability signal that contain causal relationships between the components in your system. -Do you want to know how many database calls across all systems are downstream of your application? -What services beneath a given endpoint are currently failing? -What services beneath an endpoint are currently slow? TraceQL metrics can answer all these questions by parsing your traces in aggregate. - -![Metrics visualization in Grafana](/media/docs/tempo/metrics-explore-sample-2.4.png) - -## Enable and use TraceQL metrics - -You can use the TraceQL metrics in Grafana with any existing or new Tempo data source. -This capability is available in Grafana Cloud and Grafana (10.4 and newer). - -To enable TraceQL metrics, refer to [Configure TraceQL metrics](https://grafana.com/docs/tempo/latest/operations/traceql-metrics/) for more information. - -## Exemplars - -Exemplars are a powerful feature of TraceQL metrics. -They allow you to see an exact trace that contributed to a given metric value. -This is particularly useful when you want to understand why a given metric is high or low. - -Exemplars are available in TraceQL metrics for all functions. -To get exemplars, you need to configure it in the query-frontend with the parameter `query_frontend.metrics.exemplars`, -or pass a query hint in your query. - -``` -{ name = "GET /:endpoint" } | quantile_over_time(duration, .99) by (span.http.target) with (exemplars=true) -``` - -## Functions - -TraceQL supports include `rate`, `count_over_time`, `quantile_over_time`, and `histogram_over_time` functions. These functions can be added as an operator at the end of any TraceQL query. `rate` : Calculates the number of matching spans per second `count_over_time` -: Counts the number of matching spans per time interval (see the `step` API parameter) +: Counts the number of matching spans per time interval (refer to the [`step` API parameter](https://grafana.com/docs/tempo//api_docs/#traceql-metrics)). `min_over_time` : Returns the minimum value of matching spans values per time interval (see the `step` API parameter) `max_over_time` -: Returns the maximum value of matching spans values per time interval (see the `step` API parameter) - -`avg_over_time` -: Returns the average value of matching spans values per time interval (see the `step` API parameter) +: Returns the minimum value for the specified attribute across all matching spans per time interval (refer to the [`step` API parameter](https://grafana.com/docs/tempo//api_docs/#traceql-metrics)). `quantile_over_time` : The quantile of the values in the specified interval @@ -74,14 +39,27 @@ These functions can be added as an operator at the end of any TraceQL query. `compare` : Used to split the stream of spans into two groups: a selection and a baseline. The function returns time-series for all attributes found on the spans to highlight the differences between the two groups. -### The `rate` function +## The `rate` function + +The `rate` function calculates the number of matching spans per second that match the given span selectors. + +### Parameters + +None. + +## Examples The following query shows the rate of errors by service and span name. +This is a TraceQL specific way of gathering rate metrics that would otherwise be generated by the span metrics processor. + +For example, this query: ``` { status = error } | rate() by (resource.service.name, name) ``` +Is an equivalent to using span-generated metrics and running the query. + This example calculates the rate of the erroring spans coming from the service `foo`. Rate is a `spans/sec` quantity. @@ -95,57 +73,74 @@ Combined with the `by()` operator, this can be even more powerful. { resource.service.name = "foo" && status = error } | rate() by (span.http.route) ``` -This example still rates the erroring spans in the service `foo` but the metrics have been broken +This example still rates the erroring spans in the service `foo` but the metrics are broken down by HTTP route. This might let you determine that `/api/sad` had a higher rate of erroring spans than `/api/happy`, for example. -### The `count_over_time`, `min_over_time`, `max_over_time` and `avg_over_time` functions +## The `count_over_time` function -The `count_over_time()` let you counts the number of matching spans per time interval. +The `count_over_time()` function counts the number of matching spans per time interval. +The time interval that the count will be computed over is set by the `step` parameter. +For more information, refer to the [`step` API parameter](https://grafana.com/docs/tempo//api_docs/#traceql-metrics). -``` -{ name = "GET /:endpoint" } | count_over_time() by (span.http.status_code) -``` +### Example -The `min_over_time()` let you aggregate numerical values by computing the minimum value of them, such as the all important span duration. +This example counts the number of spans with name `"GET /:endpoint"` broken down by status code. You might see that there are 10 `"GET /:endpoint"` spans with status code 200 and 15 `"GET /:endpoint"` spans with status code 400. ``` -{ name = "GET /:endpoint" } | min_over_time(duration) by (span.http.target) +{ name = "GET /:endpoint" } | count_over_time() by (span.http.status_code) + ``` -Any numerical attribute on the span is fair game. +## The `min_over_time` and `max_over_time` functions -``` -{ name = "GET /:endpoint" } | min_over_time(span.http.status_code) -``` +The `min_over_time()` function lets you aggregate numerical attributes by calculating their minimum value. +For example, you could choose to calculate the minimum duration of a group of spans, or you could choose to calculate the minimum value of a custom attribute you've attached to your spans, like `span.shopping.cart.entries`. +The time interval that the minimum is computed over is set by the `step` parameter. The `max_over_time()` let you aggregate numerical values by computing the maximum value of them, such as the all important span duration. +The time interval that the maximum is computer over is set by the `step` parameter. + +For more information, refer to the [`step` API parameter](https://grafana.com/docs/tempo//api_docs/#traceql-metrics). + +### Parameters + +Numerical field that you want to calculate the minimum or maximum of. + +### Examples + +This example computes the minimum duration for each `http.target` of all spans named `"GET /:endpoint"`. +Any numerical attribute on the span is fair game. ``` -{ name = "GET /:endpoint" } | max_over_time(duration) by (span.http.target) +{ name = "GET /:endpoint" } | min_over_time(duration) by (span.http.target) ``` +This example computes the minimum status code value of all spans named `"GET /:endpoint"`. + ``` -{ name = "GET /:endpoint" } | max_over_time(span.http.status_code) +{ name = "GET /:endpoint" } | min_over_time(span.http.status_code) ``` -The `avg_over_time()` let you aggregate numerical values by computing the average value of them, such as the all important span duration. +This example computes the maximum duration for each `http.target` of all spans named `"GET /:endpoint"`. ``` -{ name = "GET /:endpoint" } | avg_over_time(duration) by (span.http.target) +{ name = "GET /:endpoint" } | max_over_time(duration) by (span.http.target) ``` ``` -{ name = "GET /:endpoint" } | avg_over_time(event:cpu_seconds_tota) +{ name = "GET /:endpoint" } | max_over_time(span.http.response.size) ``` -### The `quantile_over_time` and `histogram_over_time` functions +## The `quantile_over_time` and `histogram_over_time` functions The `quantile_over_time()` and `histogram_over_time()` functions let you aggregate numerical values, such as the all important span duration. You can specify multiple quantiles in the same query. +The example below computes the 99th, 90th, and 50th percentile of the duration attribute on all spans with name `GET /:endpoint`. + ``` { name = "GET /:endpoint" } | quantile_over_time(duration, .99, .9, .5) ``` @@ -164,21 +159,35 @@ To demonstrate this flexibility, consider this nonsensical quantile on `span.htt { name = "GET /:endpoint" } | quantile_over_time(span.http.status_code, .99, .9, .5) ``` -### The `compare` function +This computes the 99th, 90th, and 50th percentile of the values of the `status_code` attribute for all spans named `GET /:endpoint`. +This is unlikely to tell you anything useful (what does a median status code of `347` mean?), but it works. + +As a further example, imagine a custom attribute like `span.temperature`. +You could use a similar query to know what the 50th percentile and 95th percentile temperatures were across all your spans. -This adds a new metrics function `compare` which is used to split the stream of spans into two groups: a selection and a baseline. +## The `compare` function + +The `compare` function is used to split a set of spans into two groups: a selection and a baseline. It returns time-series for all attributes found on the spans to highlight the differences between the two groups. -This is a powerful function that is best understood by looking at example outputs below: -The function is used like other metrics functions: when it's placed after any search query, and converts it into a metrics query: +This is a powerful function that's best understood by using the [**Comparison** tab in Explore Traces](https://grafana.com/docs/grafana//explore/simplified-exploration/traces/investigate/#comparison). +You can also under this function by looking at example outputs below. + +The function is used like other metrics functions: when it's placed after any trace query, it converts the query into a metrics query: `...any spanset pipeline... | compare({subset filters}, , , )` Example: + ``` { resource.service.name="a" && span.http.path="/myapi" } | compare({status=error}) ``` -This function is generally run as an instant query. It may return may exceed gRPC payloads when run as a query range. -#### Parameters + +This function is generally run as an instant query. +An instant query gives a single value at the end of the selected time range. +[Instant queries](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries) are quicker to execute and it often easier to understand their results +The returns may exceed gRPC payloads when run as a range query. + +### Parameters The `compare` function has four parameters: @@ -188,7 +197,7 @@ The `compare` function has four parameters: 3. Optional. Start and End timestamps in Unix nanoseconds, which can be used to constrain the selection window by time, in addition to the filter. For example, the overall query could cover the past hour, and the selection window only a 5 minute time period in which there was an anomaly. These timestamps must both be given, or neither. -#### Output +### Output The outputs are flat time-series for each attribute/value found in the spans. diff --git a/docs/sources/tempo/traceql/metrics-queries/solve-problems-metrics-queries.md b/docs/sources/tempo/traceql/metrics-queries/solve-problems-metrics-queries.md new file mode 100644 index 00000000000..7f2c65730c8 --- /dev/null +++ b/docs/sources/tempo/traceql/metrics-queries/solve-problems-metrics-queries.md @@ -0,0 +1,67 @@ +--- +title: Solve problems with trace metrics queries +menuTitle: Use cases +description: Solve problems with trace metrics queries +weight: 600 +keywords: + - metrics query + - TraceQL metrics +--- + +# Solve problems with trace metrics queries + +You can query data generated by TraceQL metrics in a similar way that you would query results stored in Prometheus, Grafana Mimir, or other Prometheus-compatible Time-Series-Database (TSDB). +TraceQL metrics queries allows you to calculate metrics on trace span data on-the-fly with Tempo (your tracing database), without requiring a time-series-database like Prometheus. + +This page provides an example of how you can investigate the rate of incoming requests using both PromQL and TraceQL. + +## RED metrics and queries + +The Tempo metrics-generator emits metrics with pre-configured labels for Rate, Error, and Duration (RED) metrics and service graph edges. +Generated metric labels vary, but always include the service name (in service graph metrics, as a client and/or a server type). +For more information, refer to the [metrics-generator documentation](../../metrics-generator/). + +You can use these metrics to get an overview of application performance. +The metrics can be directly correlated to the trace spans that are available for querying. + +TraceQL metrics allow a user to query metrics from traces directly from Tempo instead of requiring the metrics-generator component and an accompanying TSDB. + +{{< admonition type="note" >}} +TraceQL metrics are constrained to a 24-hour range window, and aren't available as a Grafana Managed Alerts source. For any metrics that you want to query over longer time ranges, use for alerting, or retain for more than 30 days, use the metrics-generator to store these metrics in Prometheus, Mimir, or other Prometheus-compatible TSDB and continue to use PromQL for querying. +{{< /admonition >}} + +## Investigate the rate of incoming requests + +Let's say that you want to know how many requests are being serviced both by your application, but also by each service that comprises your application. +This allows you to ensure that your application scales appropriately, can help with capacity planning, and can show you which services may be having problems and are taking up load in fail-over scenarios. +In PromQL, these values are calculated over counters that increase each time a service is called. These metrics provide the Rate (R) in RED. + +If you are familiar with PromQL, then you're used to constructing queries. +You can create an equivalent queries in TraceQL. +Here's the two queries for the different data sources (PromQL for Mimir and TraceQL for Tempo), shown side by side over a 6 hour time-range. + +![Equivalent PromQL and TraceQL queries](/media/docs/tempo/traceql/TraceQL-metrics-query-example-1.png) + +### How the query looks in PromQL + +The Tempo metrics-generator outputs a metric, `traces_spanmetrics_calls_total`, a counter that increases each time a named span in a service is called. +RED data generated by the metrics-generator includes the service name and span kind. +You can use this to only show call counts when a service was called externally by filtering via the `SERVER` span kind, thus showing the total number of times the service has been called. + +You can use the PromQL `rate()` and `sum()` functions to examine the counter and determine the per-second rate of calls occurring, summing them by each service. +In addition to only looking at spans of `kind=server`, you can also focus on spans coming from a particular Kubernetes namespace (`ditl-demo-prod`). + +``` +sum by (service_name)(rate(traces_spanmetrics_calls_total{service_namespace="ditl-demo-prod", span_kind="SPAN_KIND_SERVER"}[2m])) +``` + +### How the query looks in TraceQL + +TraceQL metrics queries let you similarly examine a particular subset of your spans. +As in the example above, you can start by filtering down to spans that occur in a particular Kubernetes namespace (`ditl-demo-prod`), and are of kind `SERVER`. +That resulting set of spans is piped to the TraceQL `rate` function, which then calculates the rate (in spans/sec) at which spans matches your filters are received. +By adding the `by (resource.service.name)` term, the query returns spans per second rates per service, rather than an aggregate across all services. + +``` +{ resource.service.namespace="ditl-demo-prod" && kind=server } | rate() by (resource.service.name) +``` diff --git a/example/docker-compose/local/docker-compose.yaml b/example/docker-compose/local/docker-compose.yaml index ee8816aac9a..f8172d2417d 100644 --- a/example/docker-compose/local/docker-compose.yaml +++ b/example/docker-compose/local/docker-compose.yaml @@ -59,13 +59,13 @@ services: - "9090:9090" grafana: - image: grafana/grafana:11.0.0 + image: grafana/grafana:11.2.0 volumes: - ../shared/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml environment: - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin - GF_AUTH_DISABLE_LOGIN_FORM=true - - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor metricsSummary ports: - "3000:3000" diff --git a/example/docker-compose/local/readme.md b/example/docker-compose/local/readme.md index 27e817685ec..be791aeeb9e 100644 --- a/example/docker-compose/local/readme.md +++ b/example/docker-compose/local/readme.md @@ -1,12 +1,12 @@ -## Local Storage +## Local storage -In this example all data is stored locally in the `tempo-data` folder. Local +In this example, all data is stored locally in the `tempo-data` folder. Local storage is fine for experimenting with Tempo or when using the single binary, -but does not work in a distributed/microservices scenario. The container runs +but doesn't work in a distributed/microservices scenario. The container runs as non-root, and so the local directory needs to have the appropriate -permissions set in order to start correctly. +permissions set to start correctly. -1.Create the storage directory with the correct permissions and start up the local stack. +1. Create the storage directory with the correct permissions and start up the local stack. ```console mkdir tempo-data/ @@ -19,29 +19,30 @@ At this point, the following containers should be spun up: docker compose ps ``` ``` - Name Command State Ports + Name Command State Ports ----------------------------------------------------------------------------------------------------------- -local_grafana_1 /run.sh Up 0.0.0.0:3000->3000/tcp,:::3000->3000/tcp -local_k6-tracing_1 /k6-tracing run /example-s ... Up -local_prometheus_1 /bin/prometheus --config.f ... Up 0.0.0.0:9090->9090/tcp,:::9090->9090/tcp -local_tempo_1 /tempo -config.file=/etc/t ... Up 0.0.0.0:14268->14268/tcp,:::14268->14268/tcp, - 0.0.0.0:3200->3200/tcp,:::3200->3200/tcp, - 0.0.0.0:4317->4317/tcp,:::4317->4317/tcp, - 0.0.0.0:4318->4318/tcp,:::4318->4318/tcp, - 0.0.0.0:9411->9411/tcp,:::9411->9411/tcp +local_grafana_1 /run.sh Up 0.0.0.0:3000->3000/tcp,:::3000->3000/tcp +local_k6-tracing_1 /k6-tracing run /example-s ... Up +local_prometheus_1 /bin/prometheus --config.f ... Up 0.0.0.0:9090->9090/tcp,:::9090->9090/tcp +local_tempo_1 /tempo -config.file=/etc/t ... Up 0.0.0.0:14268->14268/tcp,:::14268->14268/tcp, + 0.0.0.0:3200->3200/tcp,:::3200->3200/tcp, + 0.0.0.0:4317->4317/tcp,:::4317->4317/tcp, + 0.0.0.0:4318->4318/tcp,:::4318->4318/tcp, + 0.0.0.0:9411->9411/tcp,:::9411->9411/tcp ``` -2. If you're interested you can see the wal/blocks as they are being created. +2. If you're interested, you can see the wal/blocks as they are being created. ```console ls tempo-data/ ``` -3. Navigate to [Grafana](http://localhost:3000/explore) select the Tempo data source and use the "Search" +3. Navigate to [Grafana](http://localhost:3000/explore) select the Tempo data source and use the **Search** tab to find traces. Also notice that you can query Tempo metrics from the Prometheus data source setup in Grafana. -4. Tail logs of a container (eg: tempo) +4. Tail logs of a container (for example, tempo): + ```bash docker logs local_tempo_1 -f ``` @@ -52,11 +53,12 @@ docker logs local_tempo_1 -f docker compose down -v ``` -## search streaming over http +## Search streaming over HTTP -- need to set `traceQLStreaming` feature flag in Grafana -- need to enable `stream_over_http_enabled` in tempo by setting `stream_over_http_enabled: true` in the config file. +- Need to enable `stream_over_http_enabled` in Tempo by setting `stream_over_http_enabled: true` in the configuration file. -you can use Grafana or tempo-cli to make a query. +You can use Grafana or `tempo-cli `to make a query. +```console tempo-cli: `$ tempo-cli query api search "0.0.0.0:3200" --use-grpc "{}" "2023-12-05T08:11:18Z" "2023-12-05T08:12:18Z" --org-id="test"` +``` \ No newline at end of file diff --git a/example/docker-compose/local/tempo.yaml b/example/docker-compose/local/tempo.yaml index b7399e1c87b..ddd3348b1d4 100644 --- a/example/docker-compose/local/tempo.yaml +++ b/example/docker-compose/local/tempo.yaml @@ -17,6 +17,9 @@ query_frontend: search: duration_slo: 5s throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 trace_by_id: duration_slo: 5s diff --git a/example/docker-compose/multi-tenant/docker-compose.yaml b/example/docker-compose/multi-tenant/docker-compose.yaml index b7f2aa34417..bb45669305e 100644 --- a/example/docker-compose/multi-tenant/docker-compose.yaml +++ b/example/docker-compose/multi-tenant/docker-compose.yaml @@ -49,7 +49,7 @@ services: - tempo grafana: - image: grafana/grafana:11.0.0 + image: grafana/grafana:11.2.0 volumes: - ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml environment: @@ -57,6 +57,6 @@ services: - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin - GF_AUTH_DISABLE_LOGIN_FORM=true - - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor metricsSummary ports: - "3000:3000" diff --git a/example/docker-compose/multi-tenant/readme.md b/example/docker-compose/multi-tenant/readme.md index 0f514660d60..5d587d88371 100644 --- a/example/docker-compose/multi-tenant/readme.md +++ b/example/docker-compose/multi-tenant/readme.md @@ -1,6 +1,7 @@ ## Local Storage -In this example all data is stored locally in the `tempo-data` folder. Local storage is fine for experimenting with Tempo -or when using the single binary, but does not work in a distributed/microservices scenario. + +In this example, all data is stored locally in the `tempo-data` folder. Local storage is fine for experimenting with Tempo +or when using the single binary, but doesn't work in a distributed or microservices scenario. 1. Start up the local stack. @@ -16,7 +17,7 @@ At this point, the following containers should be spun up: ```console $ docker compose ps - Name Command State Ports + Name Command State Ports ------------------------------------------------------------------------------------------------------------------------------------------------------------ multi-tenant_grafana_1 /run.sh Up 0.0.0.0:3000->3000/tcp,:::3000->3000/tcp multi-tenant_k6-tracing-2_1 /k6-tracing run /example-s ... Up @@ -27,7 +28,7 @@ multi-tenant_tempo_1 /tempo -config.file=/etc/t ... Up 0.0.0.0:1 ``` -2. If you're interested you can see the wal/blocks as they are being created. +2. If you're interested, you can see the wal/blocks as they are being created. ```console $ ls tempo-data/ @@ -37,29 +38,31 @@ $ ls tempo-data/ tab to find traces. Also notice that you can query Tempo metrics from the Prometheus data source setup in Grafana. -4. Tail logs of a container (eg: tempo) +4. Tail logs of a container (for example, tempo): ```bash $ docker logs multi-tenant_tempo_1 -f ``` -5. To stop the setup use - +5. To stop the setup, use the following command: ```console docker compose down -v ``` -## streaming and multi-tenant search +## Streaming and multi-tenant search -- needs `traceQLStreaming` feature flag set in Grafana, see `docker-compose.yaml` -- needs `stream_over_http_enabled: true`, `multitenancy_enabled: true`, -and `query_frontend.multi_tenant_queries_enabled: true` in the tempo config file, see `tempo.yaml` +- Needs `stream_over_http_enabled: true`, `multitenancy_enabled: true`, +and `query_frontend.multi_tenant_queries_enabled: true` in the Tempo configuration file, see `tempo.yaml` You can use Grafana or tempo-cli to make a query. -**grpc streaming query using tempo-cli** +**gRPC streaming query using tempo-cli** - `$ tempo-cli query api search "0.0.0.0:3200" --use-grpc --limit 10000 "{}" "2023-12-05T08:11:18Z" "2023-12-05T08:12:18Z" --org-id="test"` -**multi-tenant streaming queries using tempo-cli** -- pass multiple tenant ids with `|` like this `--org-id="test|test2"` +**Multi-tenant streaming queries using tempo-cli** +- Pass multiple tenant ids with `|` like this `--org-id="test|test2"` -example: `$ ./bin/linux/tempo-cli-amd64 query api search "0.0.0.0:3200" --use-grpc --limit 10000 "{ true } >> { true }" "2024-01-15T11:00:00Z" "2024-01-19T12:30:00Z" --org-id="test|test2"` +Example: +``` +$ ./bin/linux/tempo-cli-amd64 query api search "0.0.0.0:3200" --use-grpc --limit 10000 "{ true } >> { true }" "2024-01-15T11:00:00Z" "2024-01-19T12:30:00Z" --org-id="test|test2" +``` diff --git a/example/docker-compose/multi-tenant/tempo.yaml b/example/docker-compose/multi-tenant/tempo.yaml index bad3992a2d1..655f56b04b8 100644 --- a/example/docker-compose/multi-tenant/tempo.yaml +++ b/example/docker-compose/multi-tenant/tempo.yaml @@ -10,6 +10,9 @@ query_frontend: search: duration_slo: 5s throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 trace_by_id: duration_slo: 5s diff --git a/example/docker-compose/otel-collector-multitenant/docker-compose.yaml b/example/docker-compose/otel-collector-multitenant/docker-compose.yaml index ed6cf59e4fe..b84c6bc2dd1 100644 --- a/example/docker-compose/otel-collector-multitenant/docker-compose.yaml +++ b/example/docker-compose/otel-collector-multitenant/docker-compose.yaml @@ -45,7 +45,7 @@ services: prometheus: image: prom/prometheus:latest - command: + command: - --config.file=/etc/prometheus.yaml - --web.enable-remote-write-receiver - --enable-feature=exemplar-storage @@ -56,13 +56,13 @@ services: - "9090:9090" grafana: - image: grafana/grafana:11.0.0 + image: grafana/grafana:11.2.0 volumes: - ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml environment: - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin - GF_AUTH_DISABLE_LOGIN_FORM=true - - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor metricsSummary ports: - "3000:3000" diff --git a/example/docker-compose/shared/grafana-datasources.yaml b/example/docker-compose/shared/grafana-datasources.yaml index 4a3bc2c4e74..616406b0574 100644 --- a/example/docker-compose/shared/grafana-datasources.yaml +++ b/example/docker-compose/shared/grafana-datasources.yaml @@ -28,3 +28,6 @@ datasources: httpMethod: GET serviceMap: datasourceUid: prometheus + streamingEnabled: + search: true + diff --git a/example/docker-compose/shared/tempo.yaml b/example/docker-compose/shared/tempo.yaml index b1ab5ce7607..da019fdec19 100644 --- a/example/docker-compose/shared/tempo.yaml +++ b/example/docker-compose/shared/tempo.yaml @@ -7,6 +7,9 @@ query_frontend: search: duration_slo: 5s throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 trace_by_id: duration_slo: 5s diff --git a/go.mod b/go.mod index 75062ceae7b..63ef9c95b95 100644 --- a/go.mod +++ b/go.mod @@ -27,13 +27,13 @@ require ( github.com/gorilla/mux v1.8.1 github.com/grafana/dskit v0.0.0-20240801171758-736c44c85382 github.com/grafana/e2e v0.1.1 - github.com/hashicorp/go-hclog v1.6.3 - github.com/hashicorp/go-plugin v1.6.0 + github.com/hashicorp/go-hclog v1.6.3 // indirect + github.com/hashicorp/go-plugin v1.6.0 // indirect github.com/jaegertracing/jaeger v1.57.0 github.com/jedib0t/go-pretty/v6 v6.2.4 github.com/json-iterator/go v1.1.12 github.com/jsternberg/zap-logfmt v1.2.0 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.11 github.com/minio/minio-go/v7 v7.0.70 github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c github.com/olekukonko/tablewriter v0.0.5 @@ -61,13 +61,13 @@ require ( go.opentelemetry.io/collector/consumer v0.102.1 go.opentelemetry.io/collector/pdata v1.12.0 go.opentelemetry.io/collector/semconv v0.105.0 // indirect - go.opentelemetry.io/otel v1.30.0 + go.opentelemetry.io/otel v1.31.0 go.opentelemetry.io/otel/bridge/opencensus v1.27.0 go.opentelemetry.io/otel/bridge/opentracing v1.26.0 go.opentelemetry.io/otel/exporters/jaeger v1.17.0 - go.opentelemetry.io/otel/metric v1.30.0 - go.opentelemetry.io/otel/sdk v1.28.0 - go.opentelemetry.io/otel/trace v1.30.0 + go.opentelemetry.io/otel/metric v1.31.0 + go.opentelemetry.io/otel/sdk v1.31.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 @@ -98,7 +98,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.102.0 - github.com/parquet-go/parquet-go v0.23.0 + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe github.com/stoewer/parquet-cli v0.0.7 go.opentelemetry.io/collector/config/configgrpc v0.102.1 go.opentelemetry.io/collector/config/confighttp v0.102.1 @@ -133,7 +133,7 @@ require ( github.com/alecthomas/participle/v2 v2.1.1 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect @@ -216,7 +216,7 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect @@ -260,7 +260,6 @@ require ( github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/shirou/gopsutil/v3 v3.24.4 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect @@ -321,7 +320,7 @@ require ( golang.org/x/crypto v0.25.0 // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/mod v0.19.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/tools v0.23.0 // indirect gonum.org/v1/gonum v0.15.0 // indirect diff --git a/go.sum b/go.sum index 972730dd25a..ce0616b2014 100644 --- a/go.sum +++ b/go.sum @@ -93,8 +93,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZp github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= @@ -566,8 +566,8 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -616,8 +616,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= @@ -731,8 +731,8 @@ github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7s github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -825,8 +825,6 @@ github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 h1:BkTk4gynLjguayxrYxZoMZjBnA github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e h1:uO75wNGioszjmIzcY/tvdDYKRLVvzggtAmmJkn9j4GQ= github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e/go.mod h1:tm/wZFQ8e24NYaBGIlnO2WGCAi67re4HHuOm0sftE/M= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= @@ -906,6 +904,8 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -1019,8 +1019,8 @@ go.opentelemetry.io/contrib/propagators/b3 v1.27.0 h1:IjgxbomVrV9za6bRi8fWCNXENs go.opentelemetry.io/contrib/propagators/b3 v1.27.0/go.mod h1:Dv9obQz25lCisDvvs4dy28UPh974CxkahRDUPsY7y9E= go.opentelemetry.io/contrib/zpages v0.52.0 h1:MPgkMy0Cp3O5EdfVXP0ss3ujhEibysTM4eszx7E7d+E= go.opentelemetry.io/contrib/zpages v0.52.0/go.mod h1:fqG5AFdoYru3A3DnhibVuaaEfQV2WKxE7fYE1jgDRwk= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= go.opentelemetry.io/otel/bridge/opencensus v1.27.0 h1:ao9aGGHd+G4YfjBpGs6vbkvt5hoC67STlJA9fCnOAcs= go.opentelemetry.io/otel/bridge/opencensus v1.27.0/go.mod h1:uRvWtAAXzyVOST0WMPX5JHGBaAvBws+2F8PcC5gMnTk= go.opentelemetry.io/otel/bridge/opentracing v1.26.0 h1:Q/dHj0DOhfLMAs5u5ucAbC7gy66x9xxsZRLpHCJ4XhI= @@ -1049,16 +1049,16 @@ go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0 h1:EVSnY9JbEEW92bE go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0/go.mod h1:Ea1N1QQryNXpCD0I1fdLibBAIpQuBkznMmkdKrapk1Y= go.opentelemetry.io/otel/log v0.4.0 h1:/vZ+3Utqh18e8TPjuc3ecg284078KWrR8BRz+PQAj3o= go.opentelemetry.io/otel/log v0.4.0/go.mod h1:DhGnQvky7pHy82MIRV43iXh3FlKN8UUKftn0KbLOq6I= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= -go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= -go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= go.opentelemetry.io/otel/sdk/log v0.4.0 h1:1mMI22L82zLqf6KtkjrRy5BbagOTWdJsqMY/HSqILAA= go.opentelemetry.io/otel/sdk/log v0.4.0/go.mod h1:AYJ9FVF0hNOgAVzUG/ybg/QttnXhUePWAupmCqtdESo= go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08= go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -1258,8 +1258,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= diff --git a/integration/e2e/api_test.go b/integration/e2e/api_test.go index 5fc85189bb0..7633f0e74bd 100644 --- a/integration/e2e/api_test.go +++ b/integration/e2e/api_test.go @@ -63,14 +63,14 @@ func TestSearchTagsV2(t *testing.T) { name string query string scope string - expected tempopb.SearchTagsV2Response + expected searchTagsV2Response }{ { name: "no filtering", query: "", scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr, secondBatch.SpanAttr}, @@ -86,8 +86,8 @@ func TestSearchTagsV2(t *testing.T) { name: "first batch - resource", query: fmt.Sprintf(`{ name="%s" }`, firstBatch.name), scope: "resource", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "resource", Tags: []string{firstBatch.resourceAttr, "service.name"}, @@ -99,8 +99,8 @@ func TestSearchTagsV2(t *testing.T) { name: "second batch with incomplete query - span", query: fmt.Sprintf(`{ name="%s" && span.x = }`, secondBatch.name), scope: "span", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{secondBatch.SpanAttr}, @@ -112,8 +112,8 @@ func TestSearchTagsV2(t *testing.T) { name: "first batch - resource att - span", query: fmt.Sprintf(`{ resource.%s="%s" }`, firstBatch.resourceAttr, firstBatch.resourceAttVal), scope: "span", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr}, @@ -125,8 +125,8 @@ func TestSearchTagsV2(t *testing.T) { name: "first batch - resource att - resource", query: fmt.Sprintf(`{ resource.%s="%s" }`, firstBatch.resourceAttr, firstBatch.resourceAttVal), scope: "resource", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "resource", Tags: []string{firstBatch.resourceAttr, "service.name"}, @@ -138,8 +138,8 @@ func TestSearchTagsV2(t *testing.T) { name: "second batch - resource attribute - span", query: fmt.Sprintf(`{ resource.%s="%s" }`, secondBatch.resourceAttr, secondBatch.resourceAttVal), scope: "span", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{secondBatch.SpanAttr}, @@ -151,8 +151,8 @@ func TestSearchTagsV2(t *testing.T) { name: "too restrictive query", query: fmt.Sprintf(`{ resource.%s="%s" && resource.y="%s" }`, firstBatch.resourceAttr, firstBatch.resourceAttVal, secondBatch.resourceAttVal), scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "resource", Tags: []string{"service.name"}, // well known column so included @@ -165,8 +165,8 @@ func TestSearchTagsV2(t *testing.T) { name: "unscoped span attribute", query: fmt.Sprintf(`{ .x="%s" }`, firstBatch.spanAttVal), scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr, secondBatch.SpanAttr}, @@ -182,8 +182,8 @@ func TestSearchTagsV2(t *testing.T) { name: "unscoped res attribute", query: fmt.Sprintf(`{ .xx="%s" }`, firstBatch.resourceAttVal), scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr, secondBatch.SpanAttr}, @@ -199,8 +199,8 @@ func TestSearchTagsV2(t *testing.T) { name: "both batches - name and resource attribute", query: `{ resource.service.name="my-service"}`, scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr, secondBatch.SpanAttr}, @@ -216,8 +216,8 @@ func TestSearchTagsV2(t *testing.T) { name: "bad query - unfiltered results", query: fmt.Sprintf("%s = bar", spanX), // bad query, missing quotes scope: "none", - expected: tempopb.SearchTagsV2Response{ - Scopes: []*tempopb.SearchTagsV2Scope{ + expected: searchTagsV2Response{ + Scopes: []ScopedTags{ { Name: "span", Tags: []string{firstBatch.SpanAttr, secondBatch.SpanAttr}, @@ -251,7 +251,7 @@ func TestSearchTagsV2(t *testing.T) { // Assert no more on the ingester for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - callSearchTagsV2AndAssert(t, tempo, tc.scope, tc.query, tempopb.SearchTagsV2Response{}, 0, 0) + callSearchTagsV2AndAssert(t, tempo, tc.scope, tc.query, searchTagsV2Response{}, 0, 0) }) } @@ -443,7 +443,6 @@ func TestSearchTagValuesV2(t *testing.T) { } } -// todo: add search tags v2 func TestSearchTags(t *testing.T) { s, err := e2e.NewScenario("tempo_e2e_tags") require.NoError(t, err) @@ -592,7 +591,8 @@ func callSearchTagValuesV2AndAssert(t *testing.T, svc *e2e.HTTPService, tagName, var response searchTagValuesV2Response require.NoError(t, json.Unmarshal(body, &response)) sort.Slice(response.TagValues, func(i, j int) bool { return response.TagValues[i].Value < response.TagValues[j].Value }) - require.Equal(t, expected, response) + require.Equal(t, expected.TagValues, response.TagValues) + assertMetrics(t, response.Metrics, len(expected.TagValues)) // streaming grpcReq := &tempopb.SearchTagValuesRequest{ @@ -624,15 +624,19 @@ func callSearchTagValuesV2AndAssert(t *testing.T, svc *e2e.HTTPService, tagName, actualGrpcResp.TagValues = append(actualGrpcResp.TagValues, TagValue{Type: tagValue.Type, Value: tagValue.Value}) } sort.Slice(actualGrpcResp.TagValues, func(i, j int) bool { return grpcResp.TagValues[i].Value < grpcResp.TagValues[j].Value }) - require.Equal(t, expected, actualGrpcResp) + require.Equal(t, expected.TagValues, actualGrpcResp.TagValues) + // assert metrics, and make sure it's non-zero when response is non-empty + if len(grpcResp.TagValues) > 0 { + require.Greater(t, grpcResp.Metrics.InspectedBytes, uint64(100)) + } } -func callSearchTagsV2AndAssert(t *testing.T, svc *e2e.HTTPService, scope, query string, expected tempopb.SearchTagsV2Response, start, end int64) { +func callSearchTagsV2AndAssert(t *testing.T, svc *e2e.HTTPService, scope, query string, expected searchTagsV2Response, start, end int64) { urlPath := fmt.Sprintf(`/api/v2/search/tags?scope=%s&q=%s`, scope, url.QueryEscape(query)) // expected will not have the intrinsic scope since it's the same every time, add it here. if scope == "none" || scope == "" || scope == "intrinsic" { - expected.Scopes = append(expected.Scopes, &tempopb.SearchTagsV2Scope{ + expected.Scopes = append(expected.Scopes, ScopedTags{ Name: "intrinsic", Tags: []string{"duration", "event:name", "event:timeSinceStart", "instrumentation:name", "instrumentation:version", "kind", "name", "rootName", "rootServiceName", "span:duration", "span:kind", "span:name", "span:status", "span:statusMessage", "status", "statusMessage", "trace:duration", "trace:rootName", "trace:rootService", "traceDuration"}, }) @@ -668,11 +672,12 @@ func callSearchTagsV2AndAssert(t *testing.T, svc *e2e.HTTPService, scope, query defer res.Body.Close() // parse response - var response tempopb.SearchTagsV2Response + var response searchTagsV2Response require.NoError(t, json.Unmarshal(body, &response)) prepTagsResponse(&response) - require.Equal(t, expected, response) + require.Equal(t, expected.Scopes, response.Scopes) + assertMetrics(t, response.Metrics, lenWithoutIntrinsic(response)) // streaming grpcReq := &tempopb.SearchTagsRequest{ @@ -699,12 +704,17 @@ func callSearchTagsV2AndAssert(t *testing.T, svc *e2e.HTTPService, scope, query require.NoError(t, err) } require.NotNil(t, grpcResp) + require.NotNil(t, grpcResp.Metrics) prepTagsResponse(&response) - require.Equal(t, expected, response) + require.Equal(t, expected.Scopes, response.Scopes) + // assert metrics, and make sure it's non-zero when response is non-empty + if lenWithoutIntrinsic(response) > 0 { + require.Greater(t, grpcResp.Metrics.InspectedBytes, uint64(100)) + } } -func prepTagsResponse(resp *tempopb.SearchTagsV2Response) { +func prepTagsResponse(resp *searchTagsV2Response) { if len(resp.Scopes) == 0 { resp.Scopes = nil } @@ -750,7 +760,8 @@ func callSearchTagsAndAssert(t *testing.T, svc *e2e.HTTPService, expected search require.NoError(t, json.Unmarshal(body, &response)) sort.Strings(response.TagNames) sort.Strings(expected.TagNames) - require.Equal(t, expected, response) + require.Equal(t, expected.TagNames, response.TagNames) + assertMetrics(t, response.Metrics, len(response.TagNames)) // streaming grpcReq := &tempopb.SearchTagsRequest{ @@ -781,6 +792,10 @@ func callSearchTagsAndAssert(t *testing.T, svc *e2e.HTTPService, expected search } sort.Slice(grpcResp.TagNames, func(i, j int) bool { return grpcResp.TagNames[i] < grpcResp.TagNames[j] }) require.Equal(t, expected.TagNames, grpcResp.TagNames) + // assert metrics, and make sure it's non-zero when response is non-empty + if len(grpcResp.TagNames) > 0 { + require.Greater(t, grpcResp.Metrics.InspectedBytes, uint64(100)) + } } func callSearchTagValuesAndAssert(t *testing.T, svc *e2e.HTTPService, tagName string, expected searchTagValuesResponse, start, end int64) { @@ -816,19 +831,68 @@ func callSearchTagValuesAndAssert(t *testing.T, svc *e2e.HTTPService, tagName st sort.Strings(response.TagValues) sort.Strings(expected.TagValues) - require.Equal(t, expected, response) + require.Equal(t, expected.TagValues, response.TagValues) + assertMetrics(t, response.Metrics, len(response.TagValues)) +} + +func assertMetrics(t *testing.T, metrics MetadataMetrics, respLen int) { + // metrics are not present when response is empty, so return + if respLen == 0 { + return + } + + require.NotNil(t, metrics) + require.NotEmpty(t, metrics.InspectedBytes) + inspectedBytes, err := strconv.ParseUint(metrics.InspectedBytes, 10, 64) + require.NoError(t, err) + // if response len is empty, then the inspected bytes should be 0 + // assert metrics, and make sure it's non-zero + require.Greater(t, inspectedBytes, uint64(300)) +} + +type searchTagsV2Response struct { + Scopes []ScopedTags `json:"scopes"` + Metrics MetadataMetrics `json:"metrics"` +} + +func lenWithoutIntrinsic(resp searchTagsV2Response) int { + size := 0 + for _, scope := range resp.Scopes { + // we don't count intrinsics as results for testing + if scope.Name == "intrinsic" { + continue + } + size += len(scope.Tags) + } + return size +} + +type ScopedTags struct { + Name string `json:"name"` + Tags []string `json:"tags"` +} + +type MetadataMetrics struct { + InspectedBytes string `json:"inspectedBytes"` // String to match JSON format + TotalJobs string `json:"totalJobs"` + CompletedJobs string `json:"completedJobs"` + TotalBlocks string `json:"totalBlocks"` + TotalBlockBytes string `json:"totalBlockBytes"` } type searchTagValuesV2Response struct { - TagValues []TagValue `json:"tagValues"` + TagValues []TagValue `json:"tagValues"` + Metrics MetadataMetrics `json:"metrics"` } type searchTagValuesResponse struct { - TagValues []string `json:"tagValues"` + TagValues []string `json:"tagValues"` + Metrics MetadataMetrics `json:"metrics"` } type searchTagsResponse struct { - TagNames []string `json:"tagNames"` + TagNames []string `json:"tagNames"` + Metrics MetadataMetrics `json:"metrics"` } type TagValue struct { diff --git a/integration/e2e/config-query-range.yaml b/integration/e2e/config-query-range.yaml index d17f80a77cf..faa5e5be6e0 100644 --- a/integration/e2e/config-query-range.yaml +++ b/integration/e2e/config-query-range.yaml @@ -9,8 +9,6 @@ query_frontend: search: query_backend_after: 0 # setting these both to 0 will force all range searches to hit the backend query_ingesters_until: 0 - metrics: - exemplars: true distributor: receivers: diff --git a/integration/e2e/deployments/microservices_test.go b/integration/e2e/deployments/microservices_test.go index c7eecedb5fd..e5b36db1ef7 100644 --- a/integration/e2e/deployments/microservices_test.go +++ b/integration/e2e/deployments/microservices_test.go @@ -152,7 +152,7 @@ func TestMicroservicesWithKVStores(t *testing.T) { require.NoError(t, i.WaitSumMetrics(e2e.Equals(1), "tempo_ingester_blocks_flushed_total")) } require.NoError(t, tempoQuerier.WaitSumMetrics(e2e.Equals(3), "tempodb_blocklist_length")) - require.NoError(t, tempoQueryFrontend.WaitSumMetrics(e2e.Equals(3), "tempo_query_frontend_queries_total")) + require.NoError(t, tempoQueryFrontend.WaitSumMetrics(e2e.Equals(4), "tempo_query_frontend_queries_total")) // query trace - should fetch from backend util.QueryAndAssertTrace(t, apiClient, info) diff --git a/integration/e2e/deployments/single_binary_test.go b/integration/e2e/deployments/single_binary_test.go index 7966b71bca0..587d5e1a2ff 100644 --- a/integration/e2e/deployments/single_binary_test.go +++ b/integration/e2e/deployments/single_binary_test.go @@ -125,7 +125,7 @@ func TestAllInOne(t *testing.T) { // test metrics require.NoError(t, tempo.WaitSumMetrics(e2e.Equals(1), "tempo_ingester_blocks_flushed_total")) require.NoError(t, tempo.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"tempodb_blocklist_length"}, e2e.WaitMissingMetrics)) - require.NoError(t, tempo.WaitSumMetrics(e2e.Equals(3), "tempo_query_frontend_queries_total")) + require.NoError(t, tempo.WaitSumMetrics(e2e.Equals(4), "tempo_query_frontend_queries_total")) matchers := []*labels.Matcher{ { diff --git a/integration/e2e/limits_test.go b/integration/e2e/limits_test.go index 0110dd32cef..2540214ccde 100644 --- a/integration/e2e/limits_test.go +++ b/integration/e2e/limits_test.go @@ -26,6 +26,7 @@ import ( "github.com/grafana/tempo/integration/util" "github.com/grafana/tempo/pkg/httpclient" + "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" tempoUtil "github.com/grafana/tempo/pkg/util" "github.com/grafana/tempo/pkg/util/test" @@ -233,22 +234,22 @@ func TestQueryLimits(t *testing.T) { querierClient := httpclient.New("http://"+tempo.Endpoint(3200)+"/querier", tempoUtil.FakeTenantID) _, err = client.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // confirm frontend returns 500 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm frontend returns 422 _, err = querierClient.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // todo: this should return 400 ideally so the frontend does not retry, but does not currently + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // complete block timeout is 10 seconds time.Sleep(15 * time.Second) _, err = client.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // confirm frontend returns 500 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm frontend returns 422 _, err = querierClient.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 400") // confirm querier returns 400 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm querier returns 422 } func TestLimitsPartialSuccess(t *testing.T) { diff --git a/integration/util/util.go b/integration/util/util.go index d548f59dbdb..6ddd7dc14ab 100644 --- a/integration/util/util.go +++ b/integration/util/util.go @@ -592,6 +592,12 @@ func CallBuildinfo(t *testing.T, svc *e2e.HTTPService) { _, ok := jsonResponse[key] require.True(t, ok) } + + version, ok := jsonResponse["version"].(string) + require.True(t, ok) + semverRegex := `^v?(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$` + require.Regexp(t, semverRegex, version) + defer res.Body.Close() } diff --git a/modules/distributor/config.go b/modules/distributor/config.go index d814247a41a..14bb02fa41b 100644 --- a/modules/distributor/config.go +++ b/modules/distributor/config.go @@ -8,6 +8,7 @@ import ( ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/tempo/modules/distributor/forwarder" + "github.com/grafana/tempo/modules/distributor/usage" "github.com/grafana/tempo/pkg/util" ) @@ -37,8 +38,8 @@ type Config struct { LogReceivedSpans LogSpansConfig `yaml:"log_received_spans,omitempty"` LogDiscardedSpans LogSpansConfig `yaml:"log_discarded_spans,omitempty"` MetricReceivedSpans MetricReceivedSpansConfig `yaml:"metric_received_spans,omitempty"` - - Forwarders forwarder.ConfigList `yaml:"forwarders"` + Forwarders forwarder.ConfigList `yaml:"forwarders"` + Usage usage.Config `yaml:"usage,omitempty"` // disables write extension with inactive ingesters. Use this along with ingester.lifecycler.unregister_on_shutdown = true // note that setting these two config values reduces tolerance to failures on rollout b/c there is always one guaranteed to be failing replica @@ -80,4 +81,6 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) f.BoolVar(&cfg.LogDiscardedSpans.Enabled, util.PrefixConfig(prefix, "log-discarded-spans.enabled"), false, "Enable to log every discarded span to help debug ingestion or calculate span error distributions using the logs.") f.BoolVar(&cfg.LogDiscardedSpans.IncludeAllAttributes, util.PrefixConfig(prefix, "log-discarded-spans.include-attributes"), false, "Enable to include span attributes in the logs.") f.BoolVar(&cfg.LogDiscardedSpans.FilterByStatusError, util.PrefixConfig(prefix, "log-discarded-spans.filter-by-status-error"), false, "Enable to filter out spans without status error.") + + cfg.Usage.RegisterFlagsAndApplyDefaults(prefix, f) } diff --git a/modules/distributor/distributor.go b/modules/distributor/distributor.go index 02b9a511f6e..87e57e271b5 100644 --- a/modules/distributor/distributor.go +++ b/modules/distributor/distributor.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "fmt" "math" + "net/http" "sync" "time" @@ -28,6 +29,7 @@ import ( "github.com/grafana/tempo/modules/distributor/forwarder" "github.com/grafana/tempo/modules/distributor/receiver" + "github.com/grafana/tempo/modules/distributor/usage" generator_client "github.com/grafana/tempo/modules/generator/client" ingester_client "github.com/grafana/tempo/modules/ingester/client" "github.com/grafana/tempo/modules/overrides" @@ -154,6 +156,8 @@ type Distributor struct { subservices *services.Manager subservicesWatcher *services.FailureWatcher + usage *usage.Tracker + logger log.Logger } @@ -214,6 +218,14 @@ func New(cfg Config, clientCfg ingester_client.Config, ingestersRing ring.ReadRi logger: logger, } + if cfg.Usage.CostAttribution.Enabled { + usage, err := usage.NewTracker(cfg.Usage.CostAttribution, "cost-attribution", o.CostAttributionDimensions, o.CostAttributionMaxCardinality) + if err != nil { + return nil, fmt.Errorf("creating usage tracker: %w", err) + } + d.usage = usage + } + var generatorsPoolFactory ring_client.PoolAddrFunc = func(addr string) (ring_client.PoolClient, error) { return generator_client.New(addr, generatorClientCfg) } @@ -328,6 +340,7 @@ func (d *Distributor) PushTraces(ctx context.Context, traces ptrace.Traces) (*te return &tempopb.PushResponse{}, nil } // check limits + // todo - usage tracker include discarded bytes? err = d.checkForRateLimits(size, spanCount, userID) if err != nil { return nil, err @@ -360,6 +373,11 @@ func (d *Distributor) PushTraces(ctx context.Context, traces ptrace.Traces) (*te statBytesReceived.Inc(int64(size)) statSpansReceived.Inc(int64(spanCount)) + // Usage tracking + if d.usage != nil { + d.usage.Observe(userID, batches) + } + keys, rebatchedTraces, err := requestsByTraceID(batches, userID, spanCount) if err != nil { overrides.RecordDiscardedSpans(spanCount, reasonInternalError, userID) @@ -498,6 +516,14 @@ func (*Distributor) Check(_ context.Context, _ *grpc_health_v1.HealthCheckReques return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_SERVING}, nil } +func (d *Distributor) UsageTrackerHandler() http.Handler { + if d.usage != nil { + return d.usage.Handler() + } + + return nil +} + // requestsByTraceID takes an incoming tempodb.PushRequest and creates a set of keys for the hash ring // and traces to pass onto the ingesters. func requestsByTraceID(batches []*v1.ResourceSpans, userID string, spanCount int) ([]uint32, []*rebatchedTrace, error) { diff --git a/modules/distributor/usage/config.go b/modules/distributor/usage/config.go new file mode 100644 index 00000000000..b6496ecd50d --- /dev/null +++ b/modules/distributor/usage/config.go @@ -0,0 +1,30 @@ +package usage + +import ( + "flag" + "time" +) + +const ( + defaultMaxCardinality = uint64(10000) + defaultStaleDuration = 15 * time.Minute + defaultPurgePeriod = time.Minute +) + +type PerTrackerConfig struct { + Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"` + MaxCardinality uint64 `yaml:"max_cardinality,omitempty" json:"max_cardinality,omitempty"` + StaleDuration time.Duration `yaml:"stale_duration,omitempty" json:"stale_duration,omitempty"` +} + +type Config struct { + CostAttribution PerTrackerConfig `yaml:"cost_attribution,omitempty" json:"cost_attribution,omitempty"` +} + +func (c *Config) RegisterFlagsAndApplyDefaults(_ string, _ *flag.FlagSet) { + c.CostAttribution = PerTrackerConfig{ + Enabled: false, + MaxCardinality: defaultMaxCardinality, + StaleDuration: defaultStaleDuration, + } +} diff --git a/modules/distributor/usage/tracker.go b/modules/distributor/usage/tracker.go new file mode 100644 index 00000000000..27911b4a008 --- /dev/null +++ b/modules/distributor/usage/tracker.go @@ -0,0 +1,427 @@ +package usage + +import ( + "maps" + "math" + "math/bits" + "net/http" + "slices" + "sync" + "time" + + "github.com/cespare/xxhash/v2" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/prometheus/util/strutil" + + v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" +) + +const ( + tenantLabel = "tenant" + trackerLabel = "tracker" + missingLabel = "__missing__" + overflowLabel = "__overflow__" +) + +type ( + tenantLabelsFunc func(string) map[string]string + tenantMaxFunc func(string) uint64 +) + +type bucket struct { + // Configuration + descr *prometheus.Desc // Configuration can change over time so it is captured with the bucket. + labels []string + + // Runtime data + bytes uint64 + lastUpdated int64 +} + +func (b *bucket) Inc(bytes uint64, unix int64) { + b.bytes += bytes + b.lastUpdated = unix +} + +type mapping struct { + from string + to int // Index into the values buffer +} + +type tenantUsage struct { + series map[uint64]*bucket + constLabels prometheus.Labels + + // Buffers for Observe + dimensions map[string]string // Originally configured dimensions + mapping []mapping // Mapping from attribute => final sanitized label. Typically few values and slice is faster than map + sortedKeys []string // So we can always iterate the buffer in order, this can be precomputed up front + buffer1 []string // Batch-level values + buffer2 []string // Span-level values + buffer3 []string // Last hashed values + overflow uint64 +} + +// GetBuffersForDimensions takes advantage of the fact that the configuration for a tracker +// changes slowly. Reuses buffers from the previous call when the dimensions are the same. +func (t *tenantUsage) GetBuffersForDimensions(dimensions map[string]string) ([]mapping, []string, []string, []string) { + if !maps.Equal(dimensions, t.dimensions) { + // The configuration changed. + + // Step 1 + // Gather all configured dimensions and their sanitized output + t.dimensions = dimensions + sanitizedDimensions := make(map[string]string, len(dimensions)) + for k, v := range dimensions { + // Get the final sanitized output label for this + // dimension. Dimensions are key-value pairs with + // optional value. If value is empty string, then + // we use the just the key. Regardless the output + // is always the sanitized version. + // Example: + // service.name="" => "service_name" + // service.name="foo.bar" => "foo_bar" + var sanitized string + if v == "" { + // The dimension is using default mapping + v = k + } + sanitized = strutil.SanitizeFullLabelName(v) + sanitizedDimensions[k] = sanitized + } + + // Step 2 + // Build the final list of sorted/distinct outputs + t.sortedKeys = t.sortedKeys[:0] + for _, v := range sanitizedDimensions { + if !slices.Contains(t.sortedKeys, v) { + t.sortedKeys = append(t.sortedKeys, v) + } + } + slices.Sort(t.sortedKeys) + + // Step 3 + // Prepare the mapping from raw attribute names to the final location of + // where it goes in the output buffers. This avoids another layer of indirection. + t.mapping = t.mapping[:0] + for k := range dimensions { + i := slices.Index(t.sortedKeys, sanitizedDimensions[k]) + t.mapping = append(t.mapping, mapping{ + from: k, + to: i, + }) + } + + // Step 4 + // Prepopulate the buffers and precompute the overflow bucket + t.buffer1 = make([]string, len(t.sortedKeys)) + t.buffer2 = make([]string, len(t.sortedKeys)) + t.buffer3 = make([]string, len(t.sortedKeys)) + for i := range t.sortedKeys { + t.buffer1[i] = overflowLabel + } + t.overflow = hash(t.sortedKeys, t.buffer1) + } + return t.mapping, t.buffer1, t.buffer2, t.buffer3 +} + +// func (t *tenantUsage) getSeries(labels, values []string, maxCardinality uint64) *bucket { +func (t *tenantUsage) getSeries(buffer []string, maxCardinality uint64) *bucket { + h := hash(t.sortedKeys, buffer) + + b := t.series[h] + if b == nil { + // Before creating a new series, check for cardinality limit. + if uint64(len(t.series)) >= maxCardinality { + // Overflow + // This tenant is at the maximum number of series. In this case all data + // goes into the final overflow bucket. It has the same dimensions as the + // current configuration, except every label is overridden to the special overflow value. + for k := range buffer { + buffer[k] = overflowLabel + } + h = t.overflow + b = t.series[h] + } + } + + if b == nil { + // First encounter with this series. Initialize it. + // Detach a copy of the values + v := make([]string, len(buffer)) + copy(v, buffer) + b = &bucket{ + // Metric description - constant for this pass now that the dimensions are known + descr: prometheus.NewDesc("tempo_usage_tracker_bytes_received_total", "bytes total received with these attributes", t.sortedKeys, t.constLabels), + labels: v, + } + t.series[h] = b + } + return b +} + +type Tracker struct { + mtx sync.Mutex + name string + tenants map[string]*tenantUsage + labelsFn tenantLabelsFunc + maxFn tenantMaxFunc + reg *prometheus.Registry + cfg PerTrackerConfig +} + +func NewTracker(cfg PerTrackerConfig, name string, labelsFn tenantLabelsFunc, maxFn tenantMaxFunc) (*Tracker, error) { + u := &Tracker{ + cfg: cfg, + name: name, + tenants: make(map[string]*tenantUsage), + labelsFn: labelsFn, + maxFn: maxFn, + reg: prometheus.NewRegistry(), + } + + err := u.reg.Register(u) + if err != nil { + return nil, err + } + + go u.PurgeRoutine() + + return u, nil +} + +// getTenant must be called under lock. +func (u *Tracker) getTenant(tenant string) *tenantUsage { + data := u.tenants[tenant] + if data == nil { + data = &tenantUsage{ + series: make(map[uint64]*bucket), + constLabels: prometheus.Labels{ + tenantLabel: tenant, + trackerLabel: u.name, + }, + } + u.tenants[tenant] = data + } + return data +} + +func (u *Tracker) Observe(tenant string, batches []*v1.ResourceSpans) { + dimensions := u.labelsFn(tenant) + if len(dimensions) == 0 { + // Not configured + // TODO - Should we put it all in the unattributed bucket instead? + return + } + + max := u.maxFn(tenant) + if max == 0 { + max = u.cfg.MaxCardinality + } + + u.mtx.Lock() + defer u.mtx.Unlock() + + var ( + now = time.Now().Unix() + data = u.getTenant(tenant) + mapping, buffer1, buffer2, last = data.GetBuffersForDimensions(dimensions) + ) + + for _, batch := range batches { + unaccountedForBatchData, totalSpanCount := nonSpanDataLength(batch) + + if totalSpanCount == 0 { + // Mainly to prevent a panic below, but is this even possible? + continue + } + + // This is 1/Nth of the unaccounted for batch data that gets added to each span. + // Adding this incrementally as we go through the spans is the fastest method, but + // loses some precision. The other (original) implementation is to record span counts + // per series into a map and reconcile at the end. That method has more accurate data because + // it performs the floating point math once on the total, instead of accumulating 1/N + 1/N ... errors. + batchPortion := int(math.RoundToEven(float64(unaccountedForBatchData) / float64(totalSpanCount))) + + // To account for the accumulated error we dump the remaining delta onto the first span, which can be negative. + // The result ensures the total recorded bytes matches the input. + firstSpanPortion := unaccountedForBatchData - batchPortion*totalSpanCount + + // Reset value buffer for every batch. + for k := range buffer1 { + buffer1[k] = missingLabel + } + + if batch.Resource != nil { + for _, m := range mapping { + for _, a := range batch.Resource.Attributes { + v := a.Value.GetStringValue() + if v == "" { + continue + } + if a.Key == m.from { + buffer1[m.to] = v + break + } + } + } + } + + var bucket *bucket + + for i, ss := range batch.ScopeSpans { + for j, s := range ss.Spans { + sz := s.Size() + sz += protoLengthMath(sz) + sz += batchPortion // Incrementally add 1/Nth worth of the unaccounted for batch data + if i == 0 && j == 0 { + sz += firstSpanPortion + } + + // Reset to batch values to for some spans having missing values. + copy(buffer2, buffer1) + + for _, m := range mapping { + for _, a := range s.Attributes { + v := a.Value.GetStringValue() + if v == "" { + continue + } + if a.Key == m.from { + buffer2[m.to] = v + break + } + } + } + + // Every span can be a different series. + // If the values buffer hasn't changed then we + // know it's the same bucket and avoid hashing again. + // This shows up in 2 common cases: + // - Dimensions are only resource attributes + // - Runs of spans with the same attributes + // NOTE - Not happy about the slices.Equal to detect when + // to rehash, but couldn't figure out a better way for now. + // The difficulty is tracking bucket dirty status while + // resetting to batch values and recording the span values. + if bucket == nil || !slices.Equal(buffer2, last) { + bucket = data.getSeries(buffer2, max) + copy(last, buffer2) + } + bucket.Inc(uint64(sz), now) + } + } + } +} + +func (u *Tracker) PurgeRoutine() { + purge := time.NewTicker(defaultPurgePeriod) + for range purge.C { + u.purge() + } +} + +func (u *Tracker) purge() { + u.mtx.Lock() + defer u.mtx.Unlock() + + stale := time.Now().Add(-u.cfg.StaleDuration).Unix() + + for t, data := range u.tenants { + for h, s := range data.series { + if s.lastUpdated <= stale { + delete(data.series, h) + } + } + + if len(data.series) == 0 { + // Remove empty tenant + delete(u.tenants, t) + } + } +} + +func (u *Tracker) Handler() http.Handler { + return promhttp.HandlerFor(u.reg, promhttp.HandlerOpts{}) +} + +func (u *Tracker) Describe(chan<- *prometheus.Desc) { + // This runs on startup when registering the tracker. Therefore + // we will have nothing to describe, but it's also not required. +} + +func (u *Tracker) Collect(ch chan<- prometheus.Metric) { + u.mtx.Lock() + defer u.mtx.Unlock() + + for _, t := range u.tenants { + for _, b := range t.series { + ch <- prometheus.MustNewConstMetric(b.descr, prometheus.CounterValue, float64(b.bytes), b.labels...) + } + } +} + +var _ prometheus.Collector = (*Tracker)(nil) + +// hash the given key-value pairs buffers. Buffers must have the +// same lengths +func hash(keys []string, values []string) uint64 { + h := xxhash.New() + + for i := range keys { + _, _ = h.WriteString(keys[i]) + _, _ = h.Write([]byte{255}) + _, _ = h.WriteString(values[i]) + _, _ = h.Write([]byte{255}) + } + + return h.Sum64() +} + +// nonSpanDataLength returns the number of proto bytes in the batch +// that aren't attributable to specific spans. It's complicated but much faster +// to do this because it ensures we only measure each part of the proto once. +// The first (and simpler) approach was to call batch.Size() and then subtract +// each encountered span. But this measures spans twice, which is already the slowest +// part by far. Hopefully isn't too brittle. It must be updated for new fields above the +// span level. Also returns the count of spans while we're here so we don't have to loop again. +func nonSpanDataLength(batch *v1.ResourceSpans) (int, int) { + total := 0 + spans := 0 + + if batch.Resource != nil { + sz := batch.Resource.Size() + total += sz + protoLengthMath(sz) + } + + l := len(batch.SchemaUrl) + if l > 0 { + total += l + protoLengthMath(l) + } + + for _, ss := range batch.ScopeSpans { + // This is the data to store the presence of this ss + total += protoLengthMath(1) + + l = len(ss.SchemaUrl) + if l > 0 { + total += l + protoLengthMath(l) + } + + if ss.Scope != nil { + sz := ss.Scope.Size() + total += sz + protoLengthMath(sz) + } + + spans += len(ss.Spans) + } + + return total, spans +} + +// Bookkeeping data to encode a length in proto. +// Copied from sovTrace in .pb.go +func protoLengthMath(x int) (n int) { + return 1 + (bits.Len64(uint64(x)|1)+6)/7 +} diff --git a/modules/distributor/usage/tracker_test.go b/modules/distributor/usage/tracker_test.go new file mode 100644 index 00000000000..0d95c783f76 --- /dev/null +++ b/modules/distributor/usage/tracker_test.go @@ -0,0 +1,299 @@ +package usage + +import ( + "math" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + v1common "github.com/grafana/tempo/pkg/tempopb/common/v1" + v1resource "github.com/grafana/tempo/pkg/tempopb/resource/v1" + v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" + "github.com/grafana/tempo/pkg/util/test" +) + +func testConfig() PerTrackerConfig { + return PerTrackerConfig{ + Enabled: true, + MaxCardinality: defaultMaxCardinality, + StaleDuration: defaultStaleDuration, + } +} + +func TestUsageTracker(t *testing.T) { + type testcase struct { + name string + max int + dimensions map[string]string + expected map[uint64]*bucket + } + + // Reused for all test cases + data := []*v1.ResourceSpans{ + { + Resource: &v1resource.Resource{ + Attributes: []*v1common.KeyValue{ + { + Key: "service.name", + Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "svc"}}, + }, + }, + }, + ScopeSpans: []*v1.ScopeSpans{ + { + Spans: []*v1.Span{ + { + Attributes: []*v1common.KeyValue{ + {Key: "attr", Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "1"}}}, + {Key: "attr2", Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "attr2Value"}}}, + }, + }, + { + Attributes: []*v1common.KeyValue{ + {Key: "attr", Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "1"}}}, + }, + }, + { + Attributes: []*v1common.KeyValue{ + {Key: "attr", Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "2"}}}, + }, + }, + { + Attributes: []*v1common.KeyValue{ + {Key: "attr", Value: &v1common.AnyValue{Value: &v1common.AnyValue_StringValue{StringValue: "1"}}}, + }, + }, + }, + SchemaUrl: "test", + }, + }, + SchemaUrl: "test", + }, + } + nonSpanSize, _ := nonSpanDataLength(data[0]) + + // Helper functions for dividing up data sizes + nonSpanRatio := func(r float64) uint64 { + return uint64(math.RoundToEven(float64(nonSpanSize) * r)) + } + + spanSize := func(i int) uint64 { + sz := data[0].ScopeSpans[0].Spans[i].Size() + sz += protoLengthMath(sz) + return uint64(sz) + } + + var ( + testCases []testcase + name string + dimensions map[string]string + expected map[uint64]*bucket + ) + + // ------------------------------------------------------------- + // Test case 1 - Group by service.name, entire batch is 1 series + // ------------------------------------------------------------- + name = "standard" + dimensions = map[string]string{"service.name": ""} + expected = make(map[uint64]*bucket) + expected[hash([]string{"service_name"}, []string{"svc"})] = &bucket{ + labels: []string{"svc"}, + bytes: uint64(data[0].Size()), // The entire batch is included, with the exact number of bytes + } + testCases = append(testCases, testcase{ + name: name, + dimensions: dimensions, + expected: expected, + }) + + // ------------------------------------------------------------- + // Test case 2 - Group by attr, batch is split 75%/25% + // ------------------------------------------------------------- + name = "splitbatch" + dimensions = map[string]string{"attr": ""} + expected = make(map[uint64]*bucket) + expected[hash([]string{"attr"}, []string{"1"})] = &bucket{ + labels: []string{"1"}, + bytes: nonSpanRatio(0.75) + spanSize(0) + spanSize(1) + spanSize(3), + } + expected[hash([]string{"attr"}, []string{"2"})] = &bucket{ + labels: []string{"2"}, + bytes: nonSpanRatio(0.25) + spanSize(2), + } + testCases = append(testCases, testcase{ + name: name, + dimensions: dimensions, + expected: expected, + }) + + // ------------------------------------------------------------- + // Test case 3 - Missing labels are set to __missing__ + // ------------------------------------------------------------- + name = "missing" + dimensions = map[string]string{"foo": ""} + expected = make(map[uint64]*bucket) + expected[hash([]string{"foo"}, []string{missingLabel})] = &bucket{ + labels: []string{missingLabel}, // No spans have "foo" so it is assigned to the missingvalue + bytes: uint64(data[0].Size()), + } + testCases = append(testCases, testcase{ + name: name, + dimensions: dimensions, + expected: expected, + }) + + // ------------------------------------------------------------- + // Test case 4 - Max cardinality + // ------------------------------------------------------------- + name = "maxcardinality" + dimensions = map[string]string{"attr": ""} + expected = make(map[uint64]*bucket) + expected[hash([]string{"attr"}, []string{"1"})] = &bucket{ + labels: []string{"1"}, + bytes: nonSpanRatio(0.75) + spanSize(0) + spanSize(1) + spanSize(3), // attr=1 is encountered first and recorded, with 75% of spans + } + expected[hash([]string{"attr"}, []string{overflowLabel})] = &bucket{ + labels: []string{overflowLabel}, + bytes: nonSpanRatio(0.25) + spanSize(2), // attr=2 doesn't fit within cardinality and those 25% of spans go into the overflow series. + } + testCases = append(testCases, testcase{ + name: name, + max: 1, + dimensions: dimensions, + expected: expected, + }) + + // ------------------------------------------------------------- + // Test case 5 - Multiple labels with rename + // Multiple dimensions are renamed into the same output label + // ------------------------------------------------------------- + name = "rename" + dimensions = map[string]string{ + "service.name": "foo", + "attr": "foo", + } + expected = make(map[uint64]*bucket) + expected[hash([]string{"foo"}, []string{"1"})] = &bucket{ + labels: []string{"1"}, + bytes: nonSpanRatio(0.75) + spanSize(0) + spanSize(1) + spanSize(3), + } + expected[hash([]string{"foo"}, []string{"2"})] = &bucket{ + labels: []string{"2"}, + bytes: nonSpanRatio(0.25) + spanSize(2), + } + testCases = append(testCases, testcase{ + name: name, + dimensions: dimensions, + expected: expected, + }) + + // ------------------------------------------------------------- + // Test case 6 - Some spans missing value + // Some spans within the same batch are missing values and + // should continue to inherit the batch value + // ------------------------------------------------------------- + name = "partially_missing" + dimensions = map[string]string{ + "attr2": "", + } + expected = make(map[uint64]*bucket) + expected[hash([]string{"attr2"}, []string{"attr2Value"})] = &bucket{ + labels: []string{"attr2Value"}, + bytes: nonSpanRatio(0.25) + spanSize(0), + } + expected[hash([]string{"attr2"}, []string{missingLabel})] = &bucket{ + labels: []string{missingLabel}, + bytes: nonSpanRatio(0.75) + spanSize(1) + spanSize(2) + spanSize(3), + } + testCases = append(testCases, testcase{ + name: name, + dimensions: dimensions, + expected: expected, + }) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cfg := testConfig() + if tc.max > 0 { + cfg.MaxCardinality = uint64(tc.max) + } + + u, err := NewTracker(cfg, "test", func(_ string) map[string]string { return tc.dimensions }, func(_ string) uint64 { return 0 }) + require.NoError(t, err) + + u.Observe("test", data) + actual := u.tenants["test"].series + + require.Equal(t, len(tc.expected), len(actual)) + + // Ensure total bytes recorded exactly matches the batch + total := 0 + for _, b := range actual { + total += int(b.bytes) + } + require.Equal(t, data[0].Size(), total, "total") + + for expectedHash, expectedBucket := range tc.expected { + require.Equal(t, expectedBucket.labels, actual[expectedHash].labels) + // To make testing less brittle from rounding, just ensure that each series + // is within 1 byte of expected. We already ensured the total is 100% accurate above. + require.InDelta(t, expectedBucket.bytes, actual[expectedHash].bytes, 1.0) + } + }) + } +} + +func BenchmarkUsageTrackerObserve(b *testing.B) { + var ( + tr = test.MakeTrace(10, nil) + dims = map[string]string{"service.name": "service_name"} + // dims = map[string]string{"key": ""} // To benchmark span-level attribute + labelsFn = func(_ string) map[string]string { return dims } // Allocation outside the function to not influence benchmark + maxFn = func(_ string) uint64 { return 0 } + ) + + u, err := NewTracker(testConfig(), "test", labelsFn, maxFn) + require.NoError(b, err) + + for i := 0; i < b.N; i++ { + u.Observe("test", tr.ResourceSpans) + } +} + +func BenchmarkUsageTrackerCollect(b *testing.B) { + var ( + tr = test.MakeTrace(10, nil) + dims = map[string]string{"service.name": ""} + labelsFn = func(_ string) map[string]string { return dims } // Allocation outside the function to not influence benchmark + maxFn = func(_ string) uint64 { return 0 } + req = httptest.NewRequest("", "/", nil) + resp = &NoopHTTPResponseWriter{} + ) + + u, err := NewTracker(testConfig(), "test", labelsFn, maxFn) + require.NoError(b, err) + + u.Observe("test", tr.ResourceSpans) + + handler := u.Handler() + for i := 0; i < b.N; i++ { + handler.ServeHTTP(resp, req) + } +} + +type NoopHTTPResponseWriter struct { + headers map[string][]string +} + +var _ http.ResponseWriter = (*NoopHTTPResponseWriter)(nil) + +func (n *NoopHTTPResponseWriter) Header() http.Header { + if n.headers == nil { + n.headers = make(map[string][]string) + } + return n.headers +} +func (NoopHTTPResponseWriter) Write(buf []byte) (int, error) { return len(buf), nil } +func (NoopHTTPResponseWriter) WriteHeader(_ int) {} diff --git a/modules/frontend/combiner/search_tag_values.go b/modules/frontend/combiner/search_tag_values.go index 29d8ec4163f..253033dec33 100644 --- a/modules/frontend/combiner/search_tag_values.go +++ b/modules/frontend/combiner/search_tag_values.go @@ -4,6 +4,7 @@ import ( "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/collector" "github.com/grafana/tempo/pkg/tempopb" + "go.uber.org/atomic" ) var ( @@ -13,7 +14,8 @@ var ( func NewSearchTagValues(limitBytes int) Combiner { // Distinct collector with no limit - d := collector.NewDistinctString(limitBytes) + d := collector.NewDistinctStringWithDiff(limitBytes) + inspectedBytes := atomic.NewUint64(0) c := &genericCombiner[*tempopb.SearchTagValuesResponse]{ httpStatusCode: 200, @@ -23,17 +25,30 @@ func NewSearchTagValues(limitBytes int) Combiner { for _, v := range partial.TagValues { d.Collect(v) } + if partial.Metrics != nil { + inspectedBytes.Add(partial.Metrics.InspectedBytes) + } return nil }, finalize: func(final *tempopb.SearchTagValuesResponse) (*tempopb.SearchTagValuesResponse, error) { final.TagValues = d.Strings() + // return metrics in final response + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + final.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return final, nil }, quit: func(_ *tempopb.SearchTagValuesResponse) bool { return d.Exceeded() }, diff: func(response *tempopb.SearchTagValuesResponse) (*tempopb.SearchTagValuesResponse, error) { - response.TagValues = d.Diff() + resp, err := d.Diff() + if err != nil { + return nil, err + } + response.TagValues = resp + // also return latest metrics along with diff + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + response.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return response, nil }, } @@ -48,6 +63,7 @@ func NewTypedSearchTagValues(limitBytes int) GRPCCombiner[*tempopb.SearchTagValu func NewSearchTagValuesV2(limitBytes int) Combiner { // Distinct collector with no limit and diff enabled d := collector.NewDistinctValueWithDiff(limitBytes, func(tv tempopb.TagValue) int { return len(tv.Type) + len(tv.Value) }) + inspectedBytes := atomic.NewUint64(0) c := &genericCombiner[*tempopb.SearchTagValuesV2Response]{ httpStatusCode: 200, @@ -57,6 +73,9 @@ func NewSearchTagValuesV2(limitBytes int) Combiner { for _, v := range partial.TagValues { d.Collect(*v) } + if partial.Metrics != nil { + inspectedBytes.Add(partial.Metrics.InspectedBytes) + } return nil }, finalize: func(final *tempopb.SearchTagValuesV2Response) (*tempopb.SearchTagValuesV2Response, error) { @@ -66,18 +85,27 @@ func NewSearchTagValuesV2(limitBytes int) Combiner { v2 := v final.TagValues = append(final.TagValues, &v2) } + // load Inspected Bytes here and return along with final response + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + final.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return final, nil }, quit: func(_ *tempopb.SearchTagValuesV2Response) bool { return d.Exceeded() }, diff: func(response *tempopb.SearchTagValuesV2Response) (*tempopb.SearchTagValuesV2Response, error) { - diff := d.Diff() + diff, err := d.Diff() + if err != nil { + return nil, err + } response.TagValues = make([]*tempopb.TagValue, 0, len(diff)) for _, v := range diff { v2 := v response.TagValues = append(response.TagValues, &v2) } + // also return metrics along with diffs + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + response.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return response, nil }, } diff --git a/modules/frontend/combiner/search_tags.go b/modules/frontend/combiner/search_tags.go index 47af9b5a1e6..224a5706b1a 100644 --- a/modules/frontend/combiner/search_tags.go +++ b/modules/frontend/combiner/search_tags.go @@ -4,6 +4,7 @@ import ( "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/collector" "github.com/grafana/tempo/pkg/tempopb" + "go.uber.org/atomic" ) var ( @@ -12,7 +13,8 @@ var ( ) func NewSearchTags(limitBytes int) Combiner { - d := collector.NewDistinctString(limitBytes) + d := collector.NewDistinctStringWithDiff(limitBytes) + inspectedBytes := atomic.NewUint64(0) c := &genericCombiner[*tempopb.SearchTagsResponse]{ httpStatusCode: 200, @@ -22,17 +24,31 @@ func NewSearchTags(limitBytes int) Combiner { for _, v := range partial.TagNames { d.Collect(v) } + if partial.Metrics != nil { + inspectedBytes.Add(partial.Metrics.InspectedBytes) + } return nil }, finalize: func(response *tempopb.SearchTagsResponse) (*tempopb.SearchTagsResponse, error) { response.TagNames = d.Strings() + // return metrics with final results + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + response.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return response, nil }, quit: func(_ *tempopb.SearchTagsResponse) bool { return d.Exceeded() }, diff: func(response *tempopb.SearchTagsResponse) (*tempopb.SearchTagsResponse, error) { - response.TagNames = d.Diff() + resp, err := d.Diff() + if err != nil { + return nil, err + } + + response.TagNames = resp + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + // return metrics with diff results + response.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return response, nil }, } @@ -46,7 +62,8 @@ func NewTypedSearchTags(limitBytes int) GRPCCombiner[*tempopb.SearchTagsResponse func NewSearchTagsV2(limitBytes int) Combiner { // Distinct collector map to collect scopes and scope values - distinctValues := collector.NewScopedDistinctString(limitBytes) + distinctValues := collector.NewScopedDistinctStringWithDiff(limitBytes) + inspectedBytes := atomic.NewUint64(0) c := &genericCombiner[*tempopb.SearchTagsV2Response]{ httpStatusCode: 200, @@ -58,6 +75,9 @@ func NewSearchTagsV2(limitBytes int) Combiner { distinctValues.Collect(res.Name, tag) } } + if partial.Metrics != nil { + inspectedBytes.Add(partial.Metrics.InspectedBytes) + } return nil }, finalize: func(final *tempopb.SearchTagsV2Response) (*tempopb.SearchTagsV2Response, error) { @@ -70,13 +90,19 @@ func NewSearchTagsV2(limitBytes int) Combiner { Tags: vals, }) } + // return metrics with final results + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + final.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return final, nil }, quit: func(_ *tempopb.SearchTagsV2Response) bool { return distinctValues.Exceeded() }, diff: func(response *tempopb.SearchTagsV2Response) (*tempopb.SearchTagsV2Response, error) { - collected := distinctValues.Diff() + collected, err := distinctValues.Diff() + if err != nil { + return nil, err + } response.Scopes = make([]*tempopb.SearchTagsV2Scope, 0, len(collected)) for scope, vals := range collected { @@ -85,7 +111,9 @@ func NewSearchTagsV2(limitBytes int) Combiner { Tags: vals, }) } - + // TODO: merge with other metrics as well, when we have them, return only InspectedBytes for now + // also return metrics with diff results + response.Metrics = &tempopb.MetadataMetrics{InspectedBytes: inspectedBytes.Load()} return response, nil }, } diff --git a/modules/frontend/combiner/search_tags_test.go b/modules/frontend/combiner/search_tags_test.go index 012775e823f..3fed17bc8f3 100644 --- a/modules/frontend/combiner/search_tags_test.go +++ b/modules/frontend/combiner/search_tags_test.go @@ -28,7 +28,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTags, result1: &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}}, result2: &tempopb.SearchTagsResponse{TagNames: []string{"tag2", "tag3"}}, - expectedResult: &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2", "tag3"}}, + expectedResult: &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagsResponse{}, sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagsResponse).TagNames) }, limit: 100, @@ -38,7 +38,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagsV2, result1: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1"}}}}, result2: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v2", "v1"}}}}, - expectedResult: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1", "v2"}}}}, + expectedResult: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1", "v2"}}}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagsV2Response{}, sort: func(m proto.Message) { scopes := m.(*tempopb.SearchTagsV2Response).Scopes @@ -56,7 +56,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagValues, result1: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}}, result2: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag2", "tag3"}}, - expectedResult: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2", "tag3"}}, + expectedResult: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagValuesResponse{}, sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagValuesResponse).TagValues) }, limit: 100, @@ -66,7 +66,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagValuesV2, result1: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}}, result2: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}}, - expectedResult: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}}, + expectedResult: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagValuesV2Response{}, sort: func(m proto.Message) { sort.Slice(m.(*tempopb.SearchTagValuesV2Response).TagValues, func(i, j int) bool { @@ -81,7 +81,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTags, result1: &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}}, result2: &tempopb.SearchTagsResponse{TagNames: []string{"tag2", "tag3"}}, - expectedResult: &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}}, + expectedResult: &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagsResponse{}, sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagsResponse).TagNames) }, expectedShouldQuit: true, @@ -92,7 +92,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagsV2, result1: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1"}}}}, result2: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v2", "v1"}}}}, - expectedResult: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1"}}}}, + expectedResult: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1"}}}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagsV2Response{}, sort: func(m proto.Message) { scopes := m.(*tempopb.SearchTagsV2Response).Scopes @@ -111,7 +111,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagValues, result1: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}}, result2: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag2", "tag3"}}, - expectedResult: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}}, + expectedResult: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagValuesResponse{}, sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagValuesResponse).TagValues) }, expectedShouldQuit: true, @@ -122,7 +122,7 @@ func TestTagsCombiner(t *testing.T) { factory: NewSearchTagValuesV2, result1: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}}, result2: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}}, - expectedResult: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}}, + expectedResult: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{}}, actualResult: &tempopb.SearchTagValuesV2Response{}, sort: func(m proto.Message) { sort.Slice(m.(*tempopb.SearchTagValuesV2Response).TagValues, func(i, j int) bool { @@ -132,6 +132,59 @@ func TestTagsCombiner(t *testing.T) { expectedShouldQuit: true, limit: 10, }, + // with metrics + { + name: "SearchTags - metrics", + factory: NewSearchTags, + result1: &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + result2: &tempopb.SearchTagsResponse{TagNames: []string{"tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + expectedResult: &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}}, + actualResult: &tempopb.SearchTagsResponse{}, + sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagsResponse).TagNames) }, + limit: 100, + }, + { + name: "SearchTagsV2 - metrics", + factory: NewSearchTagsV2, + result1: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + result2: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v2", "v1"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + expectedResult: &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"v1", "v2"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}}, + actualResult: &tempopb.SearchTagsV2Response{}, + sort: func(m proto.Message) { + scopes := m.(*tempopb.SearchTagsV2Response).Scopes + for _, scope := range scopes { + sort.Strings(scope.Tags) + } + sort.Slice(scopes, func(i, j int) bool { + return scopes[i].Name < scopes[j].Name + }) + }, + limit: 100, + }, + { + name: "SearchTagValues - metrics", + factory: NewSearchTagValues, + result1: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + result2: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + expectedResult: &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2", "tag3"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}}, + actualResult: &tempopb.SearchTagValuesResponse{}, + sort: func(m proto.Message) { sort.Strings(m.(*tempopb.SearchTagValuesResponse).TagValues) }, + limit: 100, + }, + { + name: "SearchTagValuesV2 - metrics", + factory: NewSearchTagValuesV2, + result1: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + result2: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}}, + expectedResult: &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}, {Value: "v3", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}}, + actualResult: &tempopb.SearchTagValuesV2Response{}, + sort: func(m proto.Message) { + sort.Slice(m.(*tempopb.SearchTagValuesV2Response).TagValues, func(i, j int) bool { + return m.(*tempopb.SearchTagValuesV2Response).TagValues[i].Value < m.(*tempopb.SearchTagValuesV2Response).TagValues[j].Value + }) + }, + limit: 100, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -154,27 +207,43 @@ func TestTagsCombiner(t *testing.T) { tc.sort(tc.expectedResult) tc.sort(tc.actualResult) require.Equal(t, tc.expectedResult, tc.actualResult) + + require.Equal(t, metrics(tc.expectedResult), metrics(tc.actualResult)) }) } } +func metrics(message proto.Message) *tempopb.MetadataMetrics { + switch m := message.(type) { + case *tempopb.SearchTagsResponse: + return m.Metrics + case *tempopb.SearchTagsV2Response: + return m.Metrics + case *tempopb.SearchTagValuesResponse: + return m.Metrics + case *tempopb.SearchTagValuesV2Response: + return m.Metrics + } + return nil +} + func TestTagsGRPCCombiner(t *testing.T) { c := NewTypedSearchTags(0) - res1 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}} - res2 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2"}} - diff1 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}} - diff2 := &tempopb.SearchTagsResponse{TagNames: []string{"tag2"}} - expectedFinal := &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2"}} + res1 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + res2 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff1 := &tempopb.SearchTagsResponse{TagNames: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff2 := &tempopb.SearchTagsResponse{TagNames: []string{"tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} + expectedFinal := &tempopb.SearchTagsResponse{TagNames: []string{"tag1", "tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} testGRPCCombiner(t, c, res1, res2, diff1, diff2, expectedFinal, func(r *tempopb.SearchTagsResponse) { sort.Strings(r.TagNames) }) } func TestTagsV2GRPCCombiner(t *testing.T) { c := NewTypedSearchTagsV2(0) - res1 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1"}}}} - res2 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1", "tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}} - diff1 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1"}}}} - diff2 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}} - expectedFinal := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1", "tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}} + res1 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + res2 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1", "tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff1 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff2 := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} + expectedFinal := &tempopb.SearchTagsV2Response{Scopes: []*tempopb.SearchTagsV2Scope{{Name: "scope1", Tags: []string{"tag1", "tag2"}}, {Name: "scope2", Tags: []string{"tag3"}}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} testGRPCCombiner(t, c, res1, res2, diff1, diff2, expectedFinal, func(r *tempopb.SearchTagsV2Response) { for _, scope := range r.Scopes { sort.Strings(scope.Tags) @@ -187,21 +256,21 @@ func TestTagsV2GRPCCombiner(t *testing.T) { func TestTagValuesGRPCCombiner(t *testing.T) { c := NewTypedSearchTagValues(0) - res1 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}} - res2 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2"}} - diff1 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}} - diff2 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag2"}} - expectedFinal := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2"}} + res1 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + res2 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff1 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff2 := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} + expectedFinal := &tempopb.SearchTagValuesResponse{TagValues: []string{"tag1", "tag2"}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} testGRPCCombiner(t, c, res1, res2, diff1, diff2, expectedFinal, func(r *tempopb.SearchTagValuesResponse) { sort.Strings(r.TagValues) }) } func TestTagValuesV2GRPCCombiner(t *testing.T) { c := NewTypedSearchTagValuesV2(0) - res1 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}} - res2 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}}} - diff1 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}} - diff2 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v2", Type: "string"}}} - expectedFinal := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}}} + res1 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + res2 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff1 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 1}} + diff2 := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v2", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} + expectedFinal := &tempopb.SearchTagValuesV2Response{TagValues: []*tempopb.TagValue{{Value: "v1", Type: "string"}, {Value: "v2", Type: "string"}}, Metrics: &tempopb.MetadataMetrics{InspectedBytes: 2}} testGRPCCombiner(t, c, res1, res2, diff1, diff2, expectedFinal, func(r *tempopb.SearchTagValuesV2Response) { sort.Slice(r.TagValues, func(i, j int) bool { return r.TagValues[i].Value < r.TagValues[j].Value diff --git a/modules/frontend/combiner/trace_by_id.go b/modules/frontend/combiner/trace_by_id.go index 24393bf8adb..dfdd886f431 100644 --- a/modules/frontend/combiner/trace_by_id.go +++ b/modules/frontend/combiner/trace_by_id.go @@ -2,6 +2,7 @@ package combiner import ( "bytes" + "errors" "fmt" "io" "net/http" @@ -83,6 +84,13 @@ func (c *traceByIDCombiner) AddResponse(r PipelineResponse) error { // Consume the trace _, err = c.c.Consume(resp.Trace) + + if errors.Is(err, trace.ErrTraceTooLarge) { + c.code = http.StatusUnprocessableEntity + c.statusMessage = fmt.Sprint(err) + return nil + } + return err } @@ -156,6 +164,11 @@ func (c *traceByIDCombiner) shouldQuit() bool { return false } + // test special case for 422 + if c.code == http.StatusUnprocessableEntity { + return true + } + // bail on other 400s if c.code/100 == 4 { return true diff --git a/modules/frontend/combiner/trace_by_id_test.go b/modules/frontend/combiner/trace_by_id_test.go index 0f605de8640..ac83e8ead6e 100644 --- a/modules/frontend/combiner/trace_by_id_test.go +++ b/modules/frontend/combiner/trace_by_id_test.go @@ -48,15 +48,15 @@ func TestTraceByIDShouldQuit(t *testing.T) { should = c.ShouldQuit() require.False(t, should) - // trace too large, should not quit but should return an error + // trace too large, should quit and should not return an error c = NewTraceByID(1, api.HeaderAcceptJSON) err = c.AddResponse(toHTTPProtoResponse(t, &tempopb.TraceByIDResponse{ Trace: test.MakeTrace(1, nil), Metrics: &tempopb.TraceByIDMetrics{}, }, 200)) - require.Error(t, err) + require.NoError(t, err) should = c.ShouldQuit() - require.False(t, should) + require.True(t, should) } func TestTraceByIDHonorsContentType(t *testing.T) { diff --git a/modules/frontend/config.go b/modules/frontend/config.go index 3d802466004..0ba194df4b2 100644 --- a/modules/frontend/config.go +++ b/modules/frontend/config.go @@ -2,13 +2,12 @@ package frontend import ( "flag" - "net/http" "time" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/tempo/modules/frontend/transport" + "github.com/grafana/tempo/modules/frontend/pipeline" v1 "github.com/grafana/tempo/modules/frontend/v1" "github.com/grafana/tempo/pkg/usagestats" ) @@ -16,14 +15,14 @@ import ( var statVersion = usagestats.NewString("frontend_version") type Config struct { - Config v1.Config `yaml:",inline"` - MaxRetries int `yaml:"max_retries,omitempty"` - Search SearchConfig `yaml:"search"` - TraceByID TraceByIDConfig `yaml:"trace_by_id"` - Metrics MetricsConfig `yaml:"metrics"` - MultiTenantQueriesEnabled bool `yaml:"multi_tenant_queries_enabled"` - ResponseConsumers int `yaml:"response_consumers"` - + Config v1.Config `yaml:",inline"` + MaxRetries int `yaml:"max_retries,omitempty"` + Search SearchConfig `yaml:"search"` + TraceByID TraceByIDConfig `yaml:"trace_by_id"` + Metrics MetricsConfig `yaml:"metrics"` + MultiTenantQueriesEnabled bool `yaml:"multi_tenant_queries_enabled"` + ResponseConsumers int `yaml:"response_consumers"` + Weights pipeline.WeightsConfig `yaml:"weights"` // the maximum time limit that tempo will work on an api request. this includes both // grpc and http requests and applies to all "api" frontend query endpoints such as // traceql, tag search, tag value search, trace by id and all streaming gRPC endpoints. @@ -32,12 +31,16 @@ type Config struct { // A list of regexes for black listing requests, these will apply for every request regardless the endpoint URLDenyList []string `yaml:"url_deny_list,omitempty"` + + RequestWithWeights bool `yaml:"request_with_weights,omitempty"` + RetryWithWeights bool `yaml:"retry_with_weights,omitempty"` } type SearchConfig struct { - Timeout time.Duration `yaml:"timeout,omitempty"` - Sharder SearchSharderConfig `yaml:",inline"` - SLO SLOConfig `yaml:",inline"` + Timeout time.Duration `yaml:"timeout,omitempty"` + Sharder SearchSharderConfig `yaml:",inline"` + SLO SLOConfig `yaml:",inline"` + MetadataSLO SLOConfig `yaml:"metadata_slo,omitempty"` } type TraceByIDConfig struct { @@ -90,11 +93,16 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(string, *flag.FlagSet) { ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, Interval: 5 * time.Minute, - Exemplars: false, // TODO: Remove? MaxExemplars: 100, }, SLO: slo, } + cfg.Weights = pipeline.WeightsConfig{ + RequestWithWeights: true, + RetryWithWeights: true, + MaxRegexConditions: 1, + MaxTraceQLConditions: 4, + } // enable multi tenant queries by default cfg.MultiTenantQueriesEnabled = true @@ -107,12 +115,12 @@ type CortexNoQuerierLimits struct{} // Returned RoundTripper can be wrapped in more round-tripper middlewares, and then eventually registered // into HTTP server using the Handler from this package. Returned RoundTripper is always non-nil // (if there are no errors), and it uses the returned frontend (if any). -func InitFrontend(cfg v1.Config, log log.Logger, reg prometheus.Registerer) (http.RoundTripper, *v1.Frontend, error) { +func InitFrontend(cfg v1.Config, log log.Logger, reg prometheus.Registerer) (pipeline.RoundTripper, *v1.Frontend, error) { statVersion.Set("v1") // No scheduler = use original frontend. fr, err := v1.New(cfg, log, reg) if err != nil { return nil, nil, err } - return transport.AdaptGrpcRoundTripperToHTTPRoundTripper(fr), fr, nil + return fr, fr, nil } diff --git a/modules/frontend/frontend.go b/modules/frontend/frontend.go index 1c5938ee4c6..d4f73276f38 100644 --- a/modules/frontend/frontend.go +++ b/modules/frontend/frontend.go @@ -59,7 +59,7 @@ type QueryFrontend struct { var tracer = otel.Tracer("modules/frontend") // New returns a new QueryFrontend -func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempodb.Reader, cacheProvider cache.Provider, apiPrefix string, logger log.Logger, registerer prometheus.Registerer) (*QueryFrontend, error) { +func New(cfg Config, next pipeline.RoundTripper, o overrides.Interface, reader tempodb.Reader, cacheProvider cache.Provider, apiPrefix string, logger log.Logger, registerer prometheus.Registerer) (*QueryFrontend, error) { level.Info(logger).Log("msg", "creating middleware in query frontend") if cfg.TraceByID.QueryShards < minQueryShards || cfg.TraceByID.QueryShards > maxQueryShards { @@ -90,8 +90,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo return nil, fmt.Errorf("frontend metrics interval should be greater than 0") } - retryWare := pipeline.NewRetryWare(cfg.MaxRetries, registerer) - + retryWare := pipeline.NewRetryWare(cfg.MaxRetries, cfg.Weights.RetryWithWeights, registerer) cacheWare := pipeline.NewCachingWare(cacheProvider, cache.RoleFrontendSearch, logger) statusCodeWare := pipeline.NewStatusCodeAdjustWare() traceIDStatusCodeWare := pipeline.NewStatusCodeAdjustWareWithAllowedCode(http.StatusNotFound) @@ -101,6 +100,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo tracePipeline := pipeline.Build( []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, + pipeline.NewWeightRequestWare(pipeline.TraceByID, cfg.Weights), multiTenantMiddleware(cfg, logger), newAsyncTraceIDSharder(&cfg.TraceByID, logger), }, @@ -111,6 +111,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, queryValidatorWare, + pipeline.NewWeightRequestWare(pipeline.TraceQLSearch, cfg.Weights), multiTenantMiddleware(cfg, logger), newAsyncSearchSharder(reader, o, cfg.Search.Sharder, logger), }, @@ -120,6 +121,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo searchTagsPipeline := pipeline.Build( []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, + pipeline.NewWeightRequestWare(pipeline.Default, cfg.Weights), multiTenantMiddleware(cfg, logger), newAsyncTagSharder(reader, o, cfg.Search.Sharder, parseTagsRequest, logger), }, @@ -129,6 +131,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo searchTagValuesPipeline := pipeline.Build( []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, + pipeline.NewWeightRequestWare(pipeline.Default, cfg.Weights), multiTenantMiddleware(cfg, logger), newAsyncTagSharder(reader, o, cfg.Search.Sharder, parseTagValuesRequest, logger), }, @@ -140,6 +143,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, queryValidatorWare, + pipeline.NewWeightRequestWare(pipeline.Default, cfg.Weights), multiTenantUnsupportedMiddleware(cfg, logger), }, []pipeline.Middleware{statusCodeWare, retryWare}, @@ -150,8 +154,20 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ urlDenyListWare, queryValidatorWare, + pipeline.NewWeightRequestWare(pipeline.TraceQLMetrics, cfg.Weights), + multiTenantMiddleware(cfg, logger), + newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, false, logger), + }, + []pipeline.Middleware{cacheWare, statusCodeWare, retryWare}, + next) + + queryInstantPipeline := pipeline.Build( + []pipeline.AsyncMiddleware[combiner.PipelineResponse]{ + urlDenyListWare, + queryValidatorWare, + pipeline.NewWeightRequestWare(pipeline.TraceQLMetrics, cfg.Weights), multiTenantMiddleware(cfg, logger), - newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, logger), + newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, true, logger), }, []pipeline.Middleware{cacheWare, statusCodeWare, retryWare}, next) @@ -159,13 +175,13 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo traces := newTraceIDHandler(cfg, tracePipeline, o, combiner.NewTraceByID, logger) tracesV2 := newTraceIDHandler(cfg, tracePipeline, o, combiner.NewTraceByIDV2, logger) search := newSearchHTTPHandler(cfg, searchPipeline, logger) - searchTags := newTagHTTPHandler(cfg, searchTagsPipeline, o, combiner.NewSearchTags, logger) - searchTagsV2 := newTagHTTPHandler(cfg, searchTagsPipeline, o, combiner.NewSearchTagsV2, logger) - searchTagValues := newTagHTTPHandler(cfg, searchTagValuesPipeline, o, combiner.NewSearchTagValues, logger) - searchTagValuesV2 := newTagHTTPHandler(cfg, searchTagValuesPipeline, o, combiner.NewSearchTagValuesV2, logger) + searchTags := newTagsHTTPHandler(cfg, searchTagsPipeline, o, logger) + searchTagsV2 := newTagsV2HTTPHandler(cfg, searchTagsPipeline, o, logger) + searchTagValues := newTagValuesHTTPHandler(cfg, searchTagValuesPipeline, o, logger) + searchTagValuesV2 := newTagValuesV2HTTPHandler(cfg, searchTagValuesPipeline, o, logger) metrics := newMetricsSummaryHandler(metricsPipeline, logger) - queryInstant := newMetricsQueryInstantHTTPHandler(cfg, queryRangePipeline, logger) // Reuses the same pipeline - queryrange := newMetricsQueryRangeHTTPHandler(cfg, queryRangePipeline, logger) + queryInstant := newMetricsQueryInstantHTTPHandler(cfg, queryInstantPipeline, logger) // Reuses the same pipeline + queryRange := newMetricsQueryRangeHTTPHandler(cfg, queryRangePipeline, logger) return &QueryFrontend{ // http/discrete @@ -178,12 +194,12 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo SearchTagsValuesV2Handler: newHandler(cfg.Config.LogQueryRequestHeaders, searchTagValuesV2, logger), MetricsSummaryHandler: newHandler(cfg.Config.LogQueryRequestHeaders, metrics, logger), MetricsQueryInstantHandler: newHandler(cfg.Config.LogQueryRequestHeaders, queryInstant, logger), - MetricsQueryRangeHandler: newHandler(cfg.Config.LogQueryRequestHeaders, queryrange, logger), + MetricsQueryRangeHandler: newHandler(cfg.Config.LogQueryRequestHeaders, queryRange, logger), // grpc/streaming streamingSearch: newSearchStreamingGRPCHandler(cfg, searchPipeline, apiPrefix, logger), - streamingTags: newTagStreamingGRPCHandler(cfg, searchTagsPipeline, apiPrefix, o, logger), - streamingTagsV2: newTagV2StreamingGRPCHandler(cfg, searchTagsPipeline, apiPrefix, o, logger), + streamingTags: newTagsStreamingGRPCHandler(cfg, searchTagsPipeline, apiPrefix, o, logger), + streamingTagsV2: newTagsV2StreamingGRPCHandler(cfg, searchTagsPipeline, apiPrefix, o, logger), streamingTagValues: newTagValuesStreamingGRPCHandler(cfg, searchTagValuesPipeline, apiPrefix, o, logger), streamingTagValuesV2: newTagValuesV2StreamingGRPCHandler(cfg, searchTagValuesPipeline, apiPrefix, o, logger), streamingQueryRange: newQueryRangeStreamingGRPCHandler(cfg, queryRangePipeline, apiPrefix, logger), diff --git a/modules/frontend/metrics_query_range_sharder.go b/modules/frontend/metrics_query_range_sharder.go index 6ffb54dd6af..76c66500de4 100644 --- a/modules/frontend/metrics_query_range_sharder.go +++ b/modules/frontend/metrics_query_range_sharder.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "math" - "net/http" "time" "github.com/go-kit/log" //nolint:all deprecated @@ -27,11 +26,12 @@ import ( ) type queryRangeSharder struct { - next pipeline.AsyncRoundTripper[combiner.PipelineResponse] - reader tempodb.Reader - overrides overrides.Interface - cfg QueryRangeSharderConfig - logger log.Logger + next pipeline.AsyncRoundTripper[combiner.PipelineResponse] + reader tempodb.Reader + overrides overrides.Interface + cfg QueryRangeSharderConfig + logger log.Logger + instantMode bool } type QueryRangeSharderConfig struct { @@ -40,28 +40,32 @@ type QueryRangeSharderConfig struct { MaxDuration time.Duration `yaml:"max_duration"` QueryBackendAfter time.Duration `yaml:"query_backend_after,omitempty"` Interval time.Duration `yaml:"interval,omitempty"` - Exemplars bool `yaml:"exemplars,omitempty"` MaxExemplars int `yaml:"max_exemplars,omitempty"` } // newAsyncQueryRangeSharder creates a sharding middleware for search -func newAsyncQueryRangeSharder(reader tempodb.Reader, o overrides.Interface, cfg QueryRangeSharderConfig, logger log.Logger) pipeline.AsyncMiddleware[combiner.PipelineResponse] { +func newAsyncQueryRangeSharder(reader tempodb.Reader, o overrides.Interface, cfg QueryRangeSharderConfig, instantMode bool, logger log.Logger) pipeline.AsyncMiddleware[combiner.PipelineResponse] { return pipeline.AsyncMiddlewareFunc[combiner.PipelineResponse](func(next pipeline.AsyncRoundTripper[combiner.PipelineResponse]) pipeline.AsyncRoundTripper[combiner.PipelineResponse] { return queryRangeSharder{ - next: next, - reader: reader, - overrides: o, - - cfg: cfg, - logger: logger, + next: next, + reader: reader, + overrides: o, + instantMode: instantMode, + cfg: cfg, + logger: logger, } }) } func (s queryRangeSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline.Responses[combiner.PipelineResponse], error) { r := pipelineRequest.HTTPRequest() + spanName := "frontend.QueryRangeSharder.range" + + if s.instantMode { + spanName = "frontend.QueryRangeSharder.instant" + } - ctx, span := tracer.Start(r.Context(), "frontend.QueryRangeSharder") + ctx, span := tracer.Start(r.Context(), spanName) defer span.End() req, err := api.ParseQueryRangeRequest(r) @@ -69,7 +73,7 @@ func (s queryRangeSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline return pipeline.NewBadRequest(err), nil } - expr, _, _, _, err := traceql.NewEngine().Compile(req.Query) + expr, _, _, _, err := traceql.Compile(req.Query) if err != nil { return pipeline.NewBadRequest(err), nil } @@ -89,24 +93,34 @@ func (s queryRangeSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline // Note: this is checked after alignment for consistency. maxDuration := s.maxDuration(tenantID) if maxDuration != 0 && time.Duration(req.End-req.Start)*time.Nanosecond > maxDuration { - err = fmt.Errorf(fmt.Sprintf("range specified by start and end (%s) exceeds %s. received start=%d end=%d", time.Duration(req.End-req.Start), maxDuration, req.Start, req.End)) + err = fmt.Errorf("range specified by start and end (%s) exceeds %s. received start=%d end=%d", time.Duration(req.End-req.Start), maxDuration, req.Start, req.End) return pipeline.NewBadRequest(err), nil } + var maxExemplars uint32 + // Instant queries must not compute exemplars + if !s.instantMode && s.cfg.MaxExemplars > 0 { + maxExemplars = req.Exemplars + if maxExemplars == 0 || maxExemplars > uint32(s.cfg.MaxExemplars) { + maxExemplars = uint32(s.cfg.MaxExemplars) // Enforce configuration + } + } + req.Exemplars = maxExemplars + var ( allowUnsafe = s.overrides.UnsafeQueryHints(tenantID) targetBytesPerRequest = s.jobSize(expr, allowUnsafe) cutoff = time.Now().Add(-s.cfg.QueryBackendAfter) ) - generatorReq := s.generatorRequest(*req, r, tenantID, cutoff) + generatorReq := s.generatorRequest(ctx, tenantID, pipelineRequest, *req, cutoff) reqCh := make(chan pipeline.Request, 2) // buffer of 2 allows us to insert generatorReq and metrics if generatorReq != nil { - reqCh <- pipeline.NewHTTPRequest(generatorReq) + reqCh <- generatorReq } - totalJobs, totalBlocks, totalBlockBytes := s.backendRequests(ctx, tenantID, r, *req, cutoff, targetBytesPerRequest, reqCh) + totalJobs, totalBlocks, totalBlockBytes := s.backendRequests(ctx, tenantID, pipelineRequest, *req, cutoff, targetBytesPerRequest, reqCh) span.SetAttributes(attribute.Int64("totalJobs", int64(totalJobs))) span.SetAttributes(attribute.Int64("totalBlocks", int64(totalBlocks))) @@ -151,14 +165,14 @@ func (s *queryRangeSharder) blockMetas(start, end int64, tenantID string) []*bac return metas } -func (s *queryRangeSharder) exemplarsPerShard(total uint32) uint32 { - if !s.cfg.Exemplars { +func (s *queryRangeSharder) exemplarsPerShard(total uint32, exemplars uint32) uint32 { + if exemplars == 0 { return 0 } - return uint32(math.Ceil(float64(s.cfg.MaxExemplars)*1.2)) / total + return uint32(math.Ceil(float64(exemplars)*1.2)) / total } -func (s *queryRangeSharder) backendRequests(ctx context.Context, tenantID string, parent *http.Request, searchReq tempopb.QueryRangeRequest, cutoff time.Time, targetBytesPerRequest int, reqCh chan pipeline.Request) (totalJobs, totalBlocks uint32, totalBlockBytes uint64) { +func (s *queryRangeSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.QueryRangeRequest, cutoff time.Time, targetBytesPerRequest int, reqCh chan pipeline.Request) (totalJobs, totalBlocks uint32, totalBlockBytes uint64) { // request without start or end, search only in generator if searchReq.Start == 0 || searchReq.End == 0 { close(reqCh) @@ -204,13 +218,13 @@ func (s *queryRangeSharder) backendRequests(ctx context.Context, tenantID string return } -func (s *queryRangeSharder) buildBackendRequests(ctx context.Context, tenantID string, parent *http.Request, searchReq tempopb.QueryRangeRequest, metas []*backend.BlockMeta, targetBytesPerRequest int, reqCh chan<- pipeline.Request) { +func (s *queryRangeSharder) buildBackendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.QueryRangeRequest, metas []*backend.BlockMeta, targetBytesPerRequest int, reqCh chan<- pipeline.Request) { defer close(reqCh) queryHash := hashForQueryRangeRequest(&searchReq) colsToJSON := api.NewDedicatedColumnsToJSON() - exemplarsPerBlock := s.exemplarsPerShard(uint32(len(metas))) + exemplarsPerBlock := s.exemplarsPerShard(uint32(len(metas)), searchReq.Exemplars) for _, m := range metas { if m.EndTime.Before(m.StartTime) { // Ignore blocks with bad timings from debugging @@ -230,7 +244,7 @@ func (s *queryRangeSharder) buildBackendRequests(ctx context.Context, tenantID s } for startPage := 0; startPage < int(m.TotalRecords); startPage += pages { - subR := parent.Clone(ctx) + subR := parent.HTTPRequest().Clone(ctx) dedColsJSON, err := colsToJSON.JSONForDedicatedColumns(m.DedicatedColumns) if err != nil { @@ -268,7 +282,7 @@ func (s *queryRangeSharder) buildBackendRequests(ctx context.Context, tenantID s subR = api.BuildQueryRangeRequest(subR, queryRangeReq, dedColsJSON) prepareRequestForQueriers(subR, tenantID) - pipelineR := pipeline.NewHTTPRequest(subR) + pipelineR := parent.CloneFromHTTPRequest(subR) // TODO: Handle sampling rate key := queryRangeCacheKey(tenantID, queryHash, int64(queryRangeReq.Start), int64(queryRangeReq.End), m, int(queryRangeReq.StartPage), int(queryRangeReq.PagesToSearch)) @@ -292,23 +306,21 @@ func max(a, b uint32) uint32 { return b } -func (s *queryRangeSharder) generatorRequest(searchReq tempopb.QueryRangeRequest, parent *http.Request, tenantID string, cutoff time.Time) *http.Request { +func (s *queryRangeSharder) generatorRequest(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.QueryRangeRequest, cutoff time.Time) *pipeline.HTTPRequest { traceql.TrimToAfter(&searchReq, cutoff) - // if start == end then we don't need to query it if searchReq.Start == searchReq.End { return nil } searchReq.QueryMode = querier.QueryModeRecent - searchReq.Exemplars = uint32(s.cfg.MaxExemplars) // TODO: Review this - subR := parent.Clone(parent.Context()) + subR := parent.HTTPRequest().Clone(ctx) subR = api.BuildQueryRangeRequest(subR, &searchReq, "") // dedicated cols are never passed to the generators prepareRequestForQueriers(subR, tenantID) - return subR + return parent.CloneFromHTTPRequest(subR) } // maxDuration returns the max search duration allowed for this tenant. diff --git a/modules/frontend/pipeline/async_weight_middleware.go b/modules/frontend/pipeline/async_weight_middleware.go new file mode 100644 index 00000000000..7ba18cde2c6 --- /dev/null +++ b/modules/frontend/pipeline/async_weight_middleware.go @@ -0,0 +1,125 @@ +package pipeline + +import ( + "github.com/grafana/tempo/modules/frontend/combiner" + "github.com/grafana/tempo/pkg/traceql" +) + +type RequestType int + +type WeightRequest interface { + SetWeight(int) + Weight() int +} + +type WeightsConfig struct { + RequestWithWeights bool `yaml:"request_with_weights,omitempty"` + RetryWithWeights bool `yaml:"retry_with_weights,omitempty"` + MaxTraceQLConditions int `yaml:"max_traceql_conditions,omitempty"` + MaxRegexConditions int `yaml:"max_regex_conditions,omitempty"` +} + +type Weights struct { + DefaultWeight int + TraceQLSearchWeight int + TraceByIDWeight int + MaxTraceQLConditions int + MaxRegexConditions int +} + +const ( + Default RequestType = iota + TraceByID + TraceQLSearch + TraceQLMetrics +) + +type weightRequestWare struct { + requestType RequestType + enabled bool + next AsyncRoundTripper[combiner.PipelineResponse] + + weights Weights +} + +// It increments the weight of a retriyed request +func IncrementRetriedRequestWeight(r WeightRequest) { + r.SetWeight(r.Weight() + 1) +} + +// It returns a new weight request middleware +func NewWeightRequestWare(rt RequestType, cfg WeightsConfig) AsyncMiddleware[combiner.PipelineResponse] { + weights := Weights{ + DefaultWeight: 1, + TraceQLSearchWeight: 1, + TraceByIDWeight: 2, + MaxTraceQLConditions: cfg.MaxTraceQLConditions, + MaxRegexConditions: cfg.MaxRegexConditions, + } + return AsyncMiddlewareFunc[combiner.PipelineResponse](func(next AsyncRoundTripper[combiner.PipelineResponse]) AsyncRoundTripper[combiner.PipelineResponse] { + return &weightRequestWare{ + requestType: rt, + enabled: cfg.RequestWithWeights, + weights: weights, + next: next, + } + }) +} + +func (c weightRequestWare) RoundTrip(req Request) (Responses[combiner.PipelineResponse], error) { + c.setWeight(req) + return c.next.RoundTrip(req) +} + +func (c weightRequestWare) setWeight(req Request) { + if !c.enabled { + req.SetWeight(c.weights.DefaultWeight) + return + } + switch c.requestType { + case TraceByID: + req.SetWeight(c.weights.TraceByIDWeight) + case TraceQLSearch, TraceQLMetrics: + c.setTraceQLWeight(req) + default: + req.SetWeight(c.weights.DefaultWeight) + } +} + +func (c weightRequestWare) setTraceQLWeight(req Request) { + var traceQLQuery string + query := req.HTTPRequest().URL.Query() + if query.Has("q") { + traceQLQuery = query.Get("q") + } + if query.Has("query") { + traceQLQuery = query.Get("query") + } + + req.SetWeight(c.weights.TraceQLSearchWeight) + + if traceQLQuery == "" { + return + } + + _, _, _, spanRequest, err := traceql.Compile(traceQLQuery) + if err != nil || spanRequest == nil { + return + } + + conditions := 0 + regexConditions := 0 + + for _, c := range spanRequest.Conditions { + if c.Op != traceql.OpNone { + conditions++ + } + if c.Op == traceql.OpRegex || c.Op == traceql.OpNotRegex { + regexConditions++ + } + } + complexQuery := regexConditions >= c.weights.MaxRegexConditions || conditions >= c.weights.MaxTraceQLConditions + if complexQuery { + req.SetWeight(c.weights.TraceQLSearchWeight + 1) + } +} diff --git a/modules/frontend/pipeline/async_weight_middleware_test.go b/modules/frontend/pipeline/async_weight_middleware_test.go new file mode 100644 index 00000000000..ae4f63f8b5a --- /dev/null +++ b/modules/frontend/pipeline/async_weight_middleware_test.go @@ -0,0 +1,115 @@ +package pipeline + +import ( + "bytes" + "context" + "io" + "net/http" + "testing" + + "github.com/grafana/tempo/modules/frontend/combiner" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var nextRequest = AsyncRoundTripperFunc[combiner.PipelineResponse](func(_ Request) (Responses[combiner.PipelineResponse], error) { + return NewHTTPToAsyncResponse(&http.Response{ + StatusCode: 200, + Body: io.NopCloser(bytes.NewReader([]byte{})), + }), nil +}) + +const ( + DefaultWeight int = 1 + TraceQLSearchWeight int = 1 + TraceByIDWeight int = 2 +) + +func TestWeightMiddlewareForTraceByIDRequest(t *testing.T) { + config := WeightsConfig{ + RequestWithWeights: true, + } + roundTrip := NewWeightRequestWare(TraceByID, config).Wrap(nextRequest) + req := DoWeightedRequest(t, "http://localhost:8080/api/v2/traces/123345", roundTrip) + + assert.Equal(t, TraceByIDWeight, req.Weight()) +} + +func TestDisabledWeightMiddlewareForTraceByIDRequest(t *testing.T) { + config := WeightsConfig{ + RequestWithWeights: false, + } + roundTrip := NewWeightRequestWare(TraceByID, config).Wrap(nextRequest) + req := DoWeightedRequest(t, "http://localhost:8080/api/v2/traces/123345", roundTrip) + + assert.Equal(t, DefaultWeight, req.Weight()) +} + +func TestWeightMiddlewareForDefaultRequest(t *testing.T) { + config := WeightsConfig{ + RequestWithWeights: true, + } + roundTrip := NewWeightRequestWare(Default, config).Wrap(nextRequest) + req := DoWeightedRequest(t, "http://localhost:8080/api/v2/search/tags", roundTrip) + + assert.Equal(t, DefaultWeight, req.Weight()) +} + +func TestWeightMiddlewareForTraceQLRequest(t *testing.T) { + config := WeightsConfig{ + RequestWithWeights: true, + MaxTraceQLConditions: 4, + MaxRegexConditions: 1, + } + roundTrip := NewWeightRequestWare(TraceQLSearch, config).Wrap(nextRequest) + cases := []struct { + req string + expected int + }{ + { + // Wrong query, this will be catched by the validator middlware + req: "http://localhost:3200/api/search?q={ span.http.status_code }", + expected: TraceQLSearchWeight, + }, + { + // Simple query + req: "http://localhost:3200/api/search?q={ span.http.status_code >= 200 }", + expected: TraceQLSearchWeight, + }, + { + // Simple query + req: "http://localhost:3200/api/search?q={ span.http.status_code >= 200 || span.http.status_code < 300 }", + expected: TraceQLSearchWeight, + }, + { + // Regex, complex query + req: "http://localhost:8080/api/search?query={span.a =~ \"postgresql|mysql\"}", + expected: TraceQLSearchWeight + 1, + }, + { + // Regex, complex query + req: "http://localhost:8080/api/search?query={span.a !~ \"postgresql|mysql\"}", + expected: TraceQLSearchWeight + 1, + }, + { + // 4 conditions, complex query + req: "http://localhost:8080/api/search?query={span.http.method = \"DELETE\" || status != ok || span.http.status_code >= 200 || span.http.status_code < 300 }", + expected: TraceQLSearchWeight + 1, + }, + } + for _, c := range cases { + actual := DoWeightedRequest(t, c.req, roundTrip) + if actual.Weight() != c.expected { + t.Errorf("expected %d, got %d", c.expected, actual.Weight()) + } + } +} + +func DoWeightedRequest(t *testing.T, url string, rt AsyncRoundTripper[combiner.PipelineResponse]) *HTTPRequest { + req, _ := http.NewRequest(http.MethodGet, url, nil) + request := NewHTTPRequest(req) + resp, _ := rt.RoundTrip(request) + _, _, err := resp.Next(context.Background()) + require.NoError(t, err) + return request +} diff --git a/modules/frontend/pipeline/pipeline.go b/modules/frontend/pipeline/pipeline.go index 0d3647b223a..920cd4eb203 100644 --- a/modules/frontend/pipeline/pipeline.go +++ b/modules/frontend/pipeline/pipeline.go @@ -15,11 +15,15 @@ type Request interface { Context() context.Context WithContext(context.Context) + Weight() int + SetWeight(int) + SetCacheKey(string) CacheKey() string SetResponseData(any) // add data that will be sent back with this requests response ResponseData() any + CloneFromHTTPRequest(request *http.Request) *HTTPRequest } type HTTPRequest struct { @@ -27,6 +31,7 @@ type HTTPRequest struct { cacheKey string responseData any + weight int } func NewHTTPRequest(req *http.Request) *HTTPRequest { @@ -65,6 +70,18 @@ func (r *HTTPRequest) ResponseData() any { return r.responseData } +func (r *HTTPRequest) Weight() int { + return r.weight +} + +func (r *HTTPRequest) SetWeight(w int) { + r.weight = w +} + +func (r *HTTPRequest) CloneFromHTTPRequest(request *http.Request) *HTTPRequest { + return &HTTPRequest{req: request, weight: r.weight} +} + // // Async Pipeline // @@ -125,7 +142,7 @@ func (f MiddlewareFunc) Wrap(w RoundTripper) RoundTripper { // // Build takes a slice of async, sync middleware and a http.RoundTripper and builds a request pipeline -func Build(asyncMW []AsyncMiddleware[combiner.PipelineResponse], mw []Middleware, next http.RoundTripper) AsyncRoundTripper[combiner.PipelineResponse] { +func Build(asyncMW []AsyncMiddleware[combiner.PipelineResponse], mw []Middleware, next RoundTripper) AsyncRoundTripper[combiner.PipelineResponse] { asyncPipeline := AsyncMiddlewareFunc[combiner.PipelineResponse](func(next AsyncRoundTripper[combiner.PipelineResponse]) AsyncRoundTripper[combiner.PipelineResponse] { for i := len(asyncMW) - 1; i >= 0; i-- { next = asyncMW[i].Wrap(next) @@ -143,7 +160,7 @@ func Build(asyncMW []AsyncMiddleware[combiner.PipelineResponse], mw []Middleware // bridge the two pipelines bridge := &pipelineBridge{ next: syncPipeline.Wrap(RoundTripperFunc(func(req Request) (*http.Response, error) { - return next.RoundTrip(req.HTTPRequest()) + return next.RoundTrip(req) })), convert: NewHTTPToAsyncResponse, } diff --git a/modules/frontend/pipeline/sync_handler_adjust_response_code.go b/modules/frontend/pipeline/sync_handler_adjust_response_code.go index a7c9a362af1..2c06d55052a 100644 --- a/modules/frontend/pipeline/sync_handler_adjust_response_code.go +++ b/modules/frontend/pipeline/sync_handler_adjust_response_code.go @@ -44,8 +44,9 @@ func (c statusCodeAdjustWare) RoundTrip(req Request) (*http.Response, error) { // if the frontend issues a bad request then externally we need to represent that as an // internal error // exceptions + // 422 - unprocessable entity // 429 - too many requests - if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { + if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 && resp.StatusCode != 422 { resp.StatusCode = http.StatusInternalServerError resp.Status = http.StatusText(http.StatusInternalServerError) // leave the body alone. it will preserve the original error message diff --git a/modules/frontend/pipeline/sync_handler_retry.go b/modules/frontend/pipeline/sync_handler_retry.go index 535eed2ce13..b8ad87fed5a 100644 --- a/modules/frontend/pipeline/sync_handler_retry.go +++ b/modules/frontend/pipeline/sync_handler_retry.go @@ -16,7 +16,7 @@ import ( "go.opentelemetry.io/otel/trace" ) -func NewRetryWare(maxRetries int, registerer prometheus.Registerer) Middleware { +func NewRetryWare(maxRetries int, incrementRetriedRequestWeight bool, registerer prometheus.Registerer) Middleware { retriesCount := promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ Namespace: "tempo", Name: "query_frontend_retries", @@ -29,17 +29,19 @@ func NewRetryWare(maxRetries int, registerer prometheus.Registerer) Middleware { return MiddlewareFunc(func(next RoundTripper) RoundTripper { return retryWare{ - next: next, - maxRetries: maxRetries, - retriesCount: retriesCount, + next: next, + maxRetries: maxRetries, + retriesCount: retriesCount, + incrementRetriedRequestWeight: incrementRetriedRequestWeight, } }) } type retryWare struct { - next RoundTripper - maxRetries int - retriesCount prometheus.Histogram + next RoundTripper + maxRetries int + incrementRetriedRequestWeight bool + retriesCount prometheus.Histogram } // RoundTrip implements http.RoundTripper @@ -61,6 +63,10 @@ func (r retryWare) RoundTrip(req Request) (*http.Response, error) { resp, err := r.next.RoundTrip(req) + if ctx.Err() != nil { + return nil, ctx.Err() + } + if r.maxRetries == 0 { return resp, err } @@ -96,6 +102,12 @@ func (r retryWare) RoundTrip(req Request) (*http.Response, error) { return resp, err } + // retries have their weight bumped. a common retry reason is the request was simply too large to process + // bumping weights should help spread the load + if r.incrementRetriedRequestWeight { + IncrementRetriedRequestWeight(req) + } + statusCode := 0 if resp != nil { statusCode = resp.StatusCode diff --git a/modules/frontend/pipeline/sync_handler_retry_test.go b/modules/frontend/pipeline/sync_handler_retry_test.go index 5d57db7c6d1..6f004c57a33 100644 --- a/modules/frontend/pipeline/sync_handler_retry_test.go +++ b/modules/frontend/pipeline/sync_handler_retry_test.go @@ -109,7 +109,7 @@ func TestRetry(t *testing.T) { t.Run(tc.name, func(t *testing.T) { try.Store(0) - retryWare := NewRetryWare(tc.maxRetries, prometheus.NewRegistry()) + retryWare := NewRetryWare(tc.maxRetries, true, prometheus.NewRegistry()) handler := retryWare.Wrap(tc.handler) req := httptest.NewRequest("GET", "http://example.com", nil) @@ -133,7 +133,7 @@ func TestRetry_CancelledRequest(t *testing.T) { req, err := http.NewRequestWithContext(ctx, "GET", "http://example.com", nil) require.NoError(t, err) - _, err = NewRetryWare(5, prometheus.NewRegistry()). + _, err = NewRetryWare(5, false, prometheus.NewRegistry()). Wrap(RoundTripperFunc(func(_ Request) (*http.Response, error) { try.Inc() return nil, ctx.Err() @@ -148,7 +148,7 @@ func TestRetry_CancelledRequest(t *testing.T) { req, err = http.NewRequestWithContext(ctx, "GET", "http://example.com", nil) require.NoError(t, err) - _, err = NewRetryWare(5, prometheus.NewRegistry()). + _, err = NewRetryWare(5, false, prometheus.NewRegistry()). Wrap(RoundTripperFunc(func(_ Request) (*http.Response, error) { try.Inc() cancel() diff --git a/modules/frontend/queue/queue.go b/modules/frontend/queue/queue.go index 75ef6db0bfc..22b0f4d7475 100644 --- a/modules/frontend/queue/queue.go +++ b/modules/frontend/queue/queue.go @@ -39,7 +39,9 @@ func FirstUser() UserIndex { } // Request stored into the queue. -type Request interface{} +type Request interface { + Weight() int +} // RequestQueue holds incoming requests in per-user queues. type RequestQueue struct { @@ -160,18 +162,7 @@ FindQueue: last.last = idx if queue != nil { // this is all threadsafe b/c all users queues are blocked by q.mtx - if len(queue) < requestedCount { - requestedCount = len(queue) - } - - // Pick next requests from the queue. - batchBuffer = batchBuffer[:requestedCount] - for i := 0; i < requestedCount; i++ { - batchBuffer[i] = <-queue - } - - q.queueLength.WithLabelValues(userID).Set(float64(len(queue))) - + batchBuffer := q.getBatchBuffer(batchBuffer, userID, queue) return batchBuffer, last, nil } @@ -181,6 +172,31 @@ FindQueue: goto FindQueue } +func (q *RequestQueue) getBatchBuffer(batchBuffer []Request, userID string, queue chan Request) []Request { + requestedCount := len(batchBuffer) + guaranteedInQueue := requestedCount + + if len(queue) < requestedCount { + guaranteedInQueue = len(queue) + } + + totalWeight := 0 + actuallyInBatch := 0 + for i := 0; i < guaranteedInQueue; i++ { + batchBuffer[i] = <-queue + actuallyInBatch++ + totalWeight += batchBuffer[i].Weight() + + if totalWeight >= requestedCount { + break + } + } + batchBuffer = batchBuffer[:actuallyInBatch] + + q.queueLength.WithLabelValues(userID).Set(float64(len(queue))) + return batchBuffer +} + func (q *RequestQueue) cleanupQueues(_ context.Context) error { q.mtx.Lock() defer q.mtx.Unlock() diff --git a/modules/frontend/queue/queue_test.go b/modules/frontend/queue/queue_test.go index 80d2ea2367d..dc877dc0905 100644 --- a/modules/frontend/queue/queue_test.go +++ b/modules/frontend/queue/queue_test.go @@ -16,9 +16,17 @@ import ( const messages = 50_000 -type mockRequest struct{} +type mockRequest struct { + weight int +} func (r *mockRequest) Invalid() bool { return false } +func (r *mockRequest) Weight() int { + if r.weight > 0 { + return r.weight + } + return 1 +} func TestGetNextForQuerierOneUser(t *testing.T) { messages := 10 @@ -28,7 +36,7 @@ func TestGetNextForQuerierOneUser(t *testing.T) { stop := make(chan struct{}) requestsPulled := atomic.NewInt32(0) - q, start := queueWithListeners(ctx, 100, 1, func(r []Request) { + q, start := queueWithListeners(ctx, 100, 1, func(_ []Request) { i := requestsPulled.Inc() if i == int32(messages) { close(stop) @@ -57,7 +65,7 @@ func TestGetNextForQuerierRandomUsers(t *testing.T) { stop := make(chan struct{}) requestsPulled := atomic.NewInt32(0) - q, start := queueWithListeners(ctx, 100, 1, func(r []Request) { + q, start := queueWithListeners(ctx, 100, 1, func(_ []Request) { if requestsPulled.Inc() == int32(messages) { close(stop) } @@ -125,7 +133,7 @@ func benchmarkGetNextForQuerier(b *testing.B, listeners int, messages int) { stop := make(chan struct{}) requestsPulled := atomic.NewInt32(0) - q, start := queueWithListeners(ctx, listeners, 1, func(r []Request) { + q, start := queueWithListeners(ctx, listeners, 1, func(_ []Request) { if requestsPulled.Inc() == int32(messages) { stop <- struct{}{} } @@ -323,6 +331,66 @@ func TestContextCond(t *testing.T) { }) } +func TestGetBatchBuffer(t *testing.T) { + tests := []struct { + name string + queueContents []Request + requestedCount int + expectedCount int + }{ + { + name: "exactly requested count", + queueContents: []Request{&mockRequest{}, &mockRequest{}, &mockRequest{}}, + requestedCount: 3, + expectedCount: 3, + }, + { + name: "less than requested count", + queueContents: []Request{&mockRequest{}, &mockRequest{}}, + requestedCount: 3, + expectedCount: 2, + }, + { + name: "more than requested count", + queueContents: []Request{&mockRequest{}, &mockRequest{}, &mockRequest{}, &mockRequest{}}, + requestedCount: 3, + expectedCount: 3, + }, + { + name: "less than requested count due to biggest weight", + queueContents: []Request{&mockRequest{10}}, + requestedCount: 3, + expectedCount: 1, + }, + { + name: "empty queue", + queueContents: []Request{}, + requestedCount: 3, + expectedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + queue := make(chan Request, len(tt.queueContents)) + for _, req := range tt.queueContents { + queue <- req + } + + q := &RequestQueue{ + queueLength: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "test_len", + }, []string{"user"}), + } + + batchBuffer := make([]Request, tt.requestedCount) + result := q.getBatchBuffer(batchBuffer, "user", queue) + + assert.Equal(t, tt.expectedCount, len(result)) + }) + } +} + func assertChanReceived(t *testing.T, c chan struct{}, timeout time.Duration, msg string) { t.Helper() diff --git a/modules/frontend/search_handlers.go b/modules/frontend/search_handlers.go index 0cf7e98fb8c..35875832016 100644 --- a/modules/frontend/search_handlers.go +++ b/modules/frontend/search_handlers.go @@ -50,8 +50,8 @@ func newSearchStreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[c } var finalResponse *tempopb.SearchResponse - c := combiner.NewTypedSearch(int(limit)) - collector := pipeline.NewGRPCCollector[*tempopb.SearchResponse](next, cfg.ResponseConsumers, c, func(sr *tempopb.SearchResponse) error { + comb := combiner.NewTypedSearch(int(limit)) + collector := pipeline.NewGRPCCollector[*tempopb.SearchResponse](next, cfg.ResponseConsumers, comb, func(sr *tempopb.SearchResponse) error { finalResponse = sr // sadly we can't srv.Send directly into the collector. we need bytesProcessed for the SLO calculations return srv.Send(sr) }) @@ -103,14 +103,14 @@ func newSearchHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.P logRequest(logger, tenant, searchReq) // build and use roundtripper - combiner := combiner.NewTypedSearch(int(limit)) - rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, combiner) + comb := combiner.NewTypedSearch(int(limit)) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) resp, err := rt.RoundTrip(req) // ask for the typed diff and use that for the SLO hook. it will have up to date metrics var bytesProcessed uint64 - searchResp, _ := combiner.GRPCDiff() + searchResp, _ := comb.GRPCDiff() if searchResp != nil && searchResp.Metrics != nil { bytesProcessed = searchResp.Metrics.InspectedBytes } diff --git a/modules/frontend/search_handlers_test.go b/modules/frontend/search_handlers_test.go index b81ff9f40d6..05766eb78ba 100644 --- a/modules/frontend/search_handlers_test.go +++ b/modules/frontend/search_handlers_test.go @@ -25,6 +25,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" + "github.com/grafana/tempo/modules/frontend/pipeline" "github.com/grafana/tempo/modules/overrides" "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/cache" @@ -37,7 +38,7 @@ import ( "github.com/grafana/tempo/tempodb/backend" ) -var _ http.RoundTripper = &mockRoundTripper{} +var _ pipeline.RoundTripper = &mockRoundTripper{} type mockRoundTripper struct { err error @@ -48,7 +49,7 @@ type mockRoundTripper struct { responseFn func() proto.Message } -func (s *mockRoundTripper) RoundTrip(_ *http.Request) (*http.Response, error) { +func (s *mockRoundTripper) RoundTrip(_ pipeline.Request) (*http.Response, error) { // only return errors once, then do a good response to make sure that the combiner is handling the error correctly var err error var errResponse *http.Response @@ -708,7 +709,7 @@ func BenchmarkSearchPipeline(b *testing.B) { // frontendWithSettings returns a new frontend with the given settings. any nil options // are given "happy path" defaults -func frontendWithSettings(t require.TestingT, next http.RoundTripper, rdr tempodb.Reader, cfg *Config, cacheProvider cache.Provider, +func frontendWithSettings(t require.TestingT, next pipeline.RoundTripper, rdr tempodb.Reader, cfg *Config, cacheProvider cache.Provider, opts ...func(*Config), ) *QueryFrontend { if next == nil { diff --git a/modules/frontend/search_sharder.go b/modules/frontend/search_sharder.go index 9799a18d242..a8e1df35ec8 100644 --- a/modules/frontend/search_sharder.go +++ b/modules/frontend/search_sharder.go @@ -3,7 +3,6 @@ package frontend import ( "context" "fmt" - "net/http" "time" "github.com/go-kit/log" //nolint:all deprecated @@ -96,7 +95,7 @@ func (s asyncSearchSharder) RoundTrip(pipelineRequest pipeline.Request) (pipelin // build request to search ingesters based on query_ingesters_until config and time range // pass subCtx in requests so we can cancel and exit early - err = s.ingesterRequests(ctx, tenantID, r, *searchReq, reqCh) + err = s.ingesterRequests(ctx, tenantID, pipelineRequest, *searchReq, reqCh) if err != nil { return nil, err } @@ -106,7 +105,7 @@ func (s asyncSearchSharder) RoundTrip(pipelineRequest pipeline.Request) (pipelin ingesterJobs := len(reqCh) // pass subCtx in requests so we can cancel and exit early - totalJobs, totalBlocks, totalBlockBytes := s.backendRequests(ctx, tenantID, r, searchReq, reqCh, func(err error) { + totalJobs, totalBlocks, totalBlockBytes := s.backendRequests(ctx, tenantID, pipelineRequest, searchReq, reqCh, func(err error) { // todo: actually find a way to return this error to the user s.logger.Log("msg", "search: failed to build backend requests", "err", err) }) @@ -154,7 +153,7 @@ func (s *asyncSearchSharder) blockMetas(start, end int64, tenantID string) []*ba // backendRequest builds backend requests to search backend blocks. backendRequest takes ownership of reqCh and closes it. // it returns 3 int values: totalBlocks, totalBlockBytes, and estimated jobs -func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent *http.Request, searchReq *tempopb.SearchRequest, reqCh chan<- pipeline.Request, errFn func(error)) (totalJobs, totalBlocks int, totalBlockBytes uint64) { +func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, reqCh chan<- pipeline.Request, errFn func(error)) (totalJobs, totalBlocks int, totalBlockBytes uint64) { var blocks []*backend.BlockMeta // request without start or end, search only in ingester @@ -200,7 +199,7 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin // that covers the ingesters. If nil is returned for the http.Request then there is no ingesters query. // since this function modifies searchReq.Start and End we are taking a value instead of a pointer to prevent it from // unexpectedly changing the passed searchReq. -func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID string, parent *http.Request, searchReq tempopb.SearchRequest, reqCh chan pipeline.Request) error { +func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.SearchRequest, reqCh chan pipeline.Request) error { // request without start or end, search only in ingester if searchReq.Start == 0 || searchReq.End == 0 { return buildIngesterRequest(ctx, tenantID, parent, &searchReq, reqCh) @@ -298,7 +297,7 @@ func backendRange(start, end uint32, queryBackendAfter time.Duration) (uint32, u // buildBackendRequests returns a slice of requests that cover all blocks in the store // that are covered by start/end. -func buildBackendRequests(ctx context.Context, tenantID string, parent *http.Request, searchReq *tempopb.SearchRequest, metas []*backend.BlockMeta, bytesPerRequest int, reqCh chan<- pipeline.Request, errFn func(error)) { +func buildBackendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, metas []*backend.BlockMeta, bytesPerRequest int, reqCh chan<- pipeline.Request, errFn func(error)) { defer close(reqCh) queryHash := hashForSearchRequest(searchReq) @@ -312,7 +311,7 @@ func buildBackendRequests(ctx context.Context, tenantID string, parent *http.Req blockID := m.BlockID.String() for startPage := 0; startPage < int(m.TotalRecords); startPage += pages { - subR := parent.Clone(ctx) + subR := parent.HTTPRequest().Clone(ctx) dedColsJSON, err := colsToJSON.JSONForDedicatedColumns(m.DedicatedColumns) if err != nil { @@ -340,7 +339,7 @@ func buildBackendRequests(ctx context.Context, tenantID string, parent *http.Req prepareRequestForQueriers(subR, tenantID) key := searchJobCacheKey(tenantID, queryHash, int64(searchReq.Start), int64(searchReq.End), m, startPage, pages) - pipelineR := pipeline.NewHTTPRequest(subR) + pipelineR := parent.CloneFromHTTPRequest(subR) pipelineR.SetCacheKey(key) select { @@ -397,14 +396,14 @@ func pagesPerRequest(m *backend.BlockMeta, bytesPerRequest int) int { return pagesPerQuery } -func buildIngesterRequest(ctx context.Context, tenantID string, parent *http.Request, searchReq *tempopb.SearchRequest, reqCh chan pipeline.Request) error { - subR := parent.Clone(ctx) +func buildIngesterRequest(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, reqCh chan pipeline.Request) error { + subR := parent.HTTPRequest().Clone(ctx) subR, err := api.BuildSearchRequest(subR, searchReq) if err != nil { return err } prepareRequestForQueriers(subR, tenantID) - reqCh <- pipeline.NewHTTPRequest(subR) + reqCh <- parent.CloneFromHTTPRequest(subR) return nil } diff --git a/modules/frontend/search_sharder_test.go b/modules/frontend/search_sharder_test.go index e38e587a7ce..ed8eb3abe6f 100644 --- a/modules/frontend/search_sharder_test.go +++ b/modules/frontend/search_sharder_test.go @@ -8,7 +8,6 @@ import ( "net/http" "net/http/httptest" "net/url" - "reflect" "strconv" "strings" "testing" @@ -46,7 +45,7 @@ func (m *mockReader) SearchTags(context.Context, *backend.BlockMeta, string, com return nil, nil } -func (m *mockReader) SearchTagValues(context.Context, *backend.BlockMeta, string, common.SearchOptions) ([]string, error) { +func (m *mockReader) SearchTagValues(context.Context, *backend.BlockMeta, string, common.SearchOptions) (*tempopb.SearchTagValuesResponse, error) { return nil, nil } @@ -54,7 +53,7 @@ func (m *mockReader) SearchTagValuesV2(context.Context, *backend.BlockMeta, *tem return nil, nil } -func (m *mockReader) FetchTagValues(context.Context, *backend.BlockMeta, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +func (m *mockReader) FetchTagValues(context.Context, *backend.BlockMeta, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return nil } @@ -74,7 +73,7 @@ func (m *mockReader) Fetch(context.Context, *backend.BlockMeta, traceql.FetchSpa return traceql.FetchSpansResponse{}, nil } -func (m *mockReader) FetchTagNames(_ context.Context, _ *backend.BlockMeta, _ traceql.FetchTagsRequest, _ traceql.FetchTagsCallback, _ common.SearchOptions) error { +func (m *mockReader) FetchTagNames(context.Context, *backend.BlockMeta, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return nil } @@ -227,7 +226,7 @@ func TestBuildBackendRequests(t *testing.T) { reqCh := make(chan pipeline.Request) go func() { - buildBackendRequests(ctx, "test", req, searchReq, tc.metas, tc.targetBytesPerRequest, reqCh, cancelCause) + buildBackendRequests(ctx, "test", pipeline.NewHTTPRequest(req), searchReq, tc.metas, tc.targetBytesPerRequest, reqCh, cancelCause) }() actualURIs := []string{} @@ -317,8 +316,8 @@ func TestBackendRequests(t *testing.T) { reqCh := make(chan pipeline.Request) ctx, cancelCause := context.WithCancelCause(context.Background()) - - jobs, blocks, blockBytes := s.backendRequests(ctx, "test", r, searchReq, reqCh, cancelCause) + pipelineRequest := pipeline.NewHTTPRequest(r) + jobs, blocks, blockBytes := s.backendRequests(ctx, "test", pipelineRequest, searchReq, reqCh, cancelCause) require.Equal(t, tc.expectedJobs, jobs) require.Equal(t, tc.expectedBlocks, blocks) require.Equal(t, tc.expectedBlockBytes, blockBytes) @@ -493,8 +492,9 @@ func TestIngesterRequests(t *testing.T) { reqChan := make(chan pipeline.Request, tc.ingesterShards) defer close(reqChan) - copyReq := searchReq - err = s.ingesterRequests(context.Background(), "test", req, *searchReq, reqChan) + pr := pipeline.NewHTTPRequest(req) + pr.SetWeight(2) + err = s.ingesterRequests(context.Background(), "test", pr, *searchReq, reqChan) if tc.expectedError != nil { assert.Equal(t, tc.expectedError, err) continue @@ -541,13 +541,8 @@ func TestIngesterRequests(t *testing.T) { } require.Equal(t, v, values[k]) + require.Equal(t, 2, req.Weight()) } - - /* require.Equal(t, expectedURI, req.RequestURI) */ - - // it may seem odd to test that the searchReq is not modified, but this is to prevent an issue that - // occurs if the ingesterRequest method is changed to take a searchReq pointer - require.True(t, reflect.DeepEqual(copyReq, searchReq)) } } } diff --git a/modules/frontend/slos.go b/modules/frontend/slos.go index 7ee749e60c2..e2e7c8cc918 100644 --- a/modules/frontend/slos.go +++ b/modules/frontend/slos.go @@ -13,6 +13,7 @@ import ( const ( traceByIDOp = "traces" searchOp = "search" + metadataOp = "metadata" metricsOp = "metrics" ) @@ -28,6 +29,7 @@ var ( sloTraceByIDCounter = sloQueriesPerTenant.MustCurryWith(prometheus.Labels{"op": traceByIDOp}) sloSearchCounter = sloQueriesPerTenant.MustCurryWith(prometheus.Labels{"op": searchOp}) + sloMetadataCounter = sloQueriesPerTenant.MustCurryWith(prometheus.Labels{"op": metadataOp}) sloMetricsCounter = sloQueriesPerTenant.MustCurryWith(prometheus.Labels{"op": metricsOp}) // be careful about adding or removing labels from this metric. this, along with the @@ -41,6 +43,7 @@ var ( traceByIDCounter = queriesPerTenant.MustCurryWith(prometheus.Labels{"op": traceByIDOp}) searchCounter = queriesPerTenant.MustCurryWith(prometheus.Labels{"op": searchOp}) + metadataCounter = queriesPerTenant.MustCurryWith(prometheus.Labels{"op": metadataOp}) metricsCounter = queriesPerTenant.MustCurryWith(prometheus.Labels{"op": metricsOp}) queryThroughput = promauto.NewHistogramVec(prometheus.HistogramOpts{ @@ -53,8 +56,9 @@ var ( NativeHistogramMinResetDuration: 1 * time.Hour, }, []string{"tenant", "op"}) - searchThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": searchOp}) - metricsThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": metricsOp}) + searchThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": searchOp}) + metadataThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": metadataOp}) + metricsThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": metricsOp}) ) type ( @@ -70,6 +74,10 @@ func searchSLOPostHook(cfg SLOConfig) handlerPostHook { return sloHook(searchCounter, sloSearchCounter, searchThroughput, cfg) } +func metadataSLOPostHook(cfg SLOConfig) handlerPostHook { + return sloHook(metadataCounter, sloMetadataCounter, metadataThroughput, cfg) +} + func metricsSLOPostHook(cfg SLOConfig) handlerPostHook { return sloHook(metricsCounter, sloMetricsCounter, metricsThroughput, cfg) } diff --git a/modules/frontend/tag_handlers.go b/modules/frontend/tag_handlers.go index 63bbdde660c..b61bc4beb8f 100644 --- a/modules/frontend/tag_handlers.go +++ b/modules/frontend/tag_handlers.go @@ -7,12 +7,13 @@ import ( "net/http" "net/url" "path" + "regexp" + "strconv" "strings" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/gogo/protobuf/proto" "github.com/gogo/status" "github.com/gorilla/mux" "github.com/grafana/dskit/user" @@ -24,158 +25,434 @@ import ( "google.golang.org/grpc/codes" ) +// regex patterns for tag values endpoints, precompile for performance +var ( + tagNameRegexV1 = regexp.MustCompile(`.*/api/search/tag/([^/]+)/values`) + tagNameRegexV2 = regexp.MustCompile(`.*/api/v2/search/tag/([^/]+)/values`) +) + //nolint:all //deprecated -// newTagStreamingGRPCHandler returns a handler that streams results from the HTTP handler -func newTagStreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagsHandler { +// streaming grpc handlers + +// newTagsStreamingGRPCHandler returns a handler that streams results from the HTTP handler +func newTagsStreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagsHandler { downstreamPath := path.Join(apiPrefix, api.PathSearchTags) + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) return func(req *tempopb.SearchTagsRequest, srv tempopb.StreamingQuerier_SearchTagsServer) error { - return streamingTags(srv.Context(), cfg, next, req, downstreamPath, "", o, api.BuildSearchTagsRequest, srv.Send, combiner.NewTypedSearchTags, logTagsRequest, logTagsResult, logger) + httpReq, tenant, err := buildTagsRequestAndExtractTenant(srv.Context(), req, downstreamPath, logger) + if err != nil { + return err + } + prepareRequestForQueriers(httpReq, tenant) + + var finalResponse *tempopb.SearchTagsResponse + comb := combiner.NewTypedSearchTags(o.MaxBytesPerTagValuesQuery(tenant)) + collector := pipeline.NewGRPCCollector[*tempopb.SearchTagsResponse](next, cfg.ResponseConsumers, comb, func(res *tempopb.SearchTagsResponse) error { + finalResponse = res // to get the bytes processed for SLO calculations + return srv.Send(res) + }) + + start := time.Now() + logTagsRequest(logger, tenant, "SearchTagsStreaming", req.Scope, req.End-req.Start) + err = collector.RoundTrip(httpReq) + + duration := time.Since(start) + bytesProcessed := uint64(0) + if finalResponse != nil && finalResponse.Metrics != nil { + bytesProcessed = finalResponse.Metrics.InspectedBytes + } + postSLOHook(nil, tenant, bytesProcessed, duration, err) + logTagsResult(logger, tenant, "SearchTagsStreaming", req.Scope, req.End-req.Start, duration.Seconds(), bytesProcessed, err) + + return err } } -// newTagStreamingGRPCHandler returns a handler that streams results from the HTTP handler -func newTagV2StreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagsV2Handler { +func newTagsV2StreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagsV2Handler { downstreamPath := path.Join(apiPrefix, api.PathSearchTagsV2) + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) return func(req *tempopb.SearchTagsRequest, srv tempopb.StreamingQuerier_SearchTagsV2Server) error { - return streamingTags(srv.Context(), cfg, next, req, downstreamPath, "", o, api.BuildSearchTagsRequest, srv.Send, combiner.NewTypedSearchTagsV2, logTagsRequest, logTagsResult, logger) + httpReq, tenant, err := buildTagsRequestAndExtractTenant(srv.Context(), req, downstreamPath, logger) + if err != nil { + return err + } + prepareRequestForQueriers(httpReq, tenant) + + var finalResponse *tempopb.SearchTagsV2Response + comb := combiner.NewTypedSearchTagsV2(o.MaxBytesPerTagValuesQuery(tenant)) + collector := pipeline.NewGRPCCollector[*tempopb.SearchTagsV2Response](next, cfg.ResponseConsumers, comb, func(res *tempopb.SearchTagsV2Response) error { + finalResponse = res // to get the bytes processed for SLO calculations + return srv.Send(res) + }) + + start := time.Now() + logTagsRequest(logger, tenant, "SearchTagsV2Streaming", req.Scope, req.End-req.Start) + err = collector.RoundTrip(httpReq) + + duration := time.Since(start) + bytesProcessed := uint64(0) + if finalResponse != nil && finalResponse.Metrics != nil { + bytesProcessed = finalResponse.Metrics.InspectedBytes + } + postSLOHook(nil, tenant, bytesProcessed, duration, err) + logTagsResult(logger, tenant, "SearchTagsV2Streaming", req.Scope, req.End-req.Start, duration.Seconds(), bytesProcessed, err) + + return err } } func newTagValuesStreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagValuesHandler { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + return func(req *tempopb.SearchTagValuesRequest, srv tempopb.StreamingQuerier_SearchTagValuesServer) error { // we have to interpolate the tag name into the path so that when it is routed to the queriers // they will parse it correctly. see also the mux.SetUrlVars discussion below. pathWithValue := strings.Replace(api.PathSearchTagValues, "{"+api.MuxVarTagName+"}", req.TagName, 1) downstreamPath := path.Join(apiPrefix, pathWithValue) - return streamingTags(srv.Context(), cfg, next, req, downstreamPath, req.TagName, o, api.BuildSearchTagValuesRequest, srv.Send, combiner.NewTypedSearchTagValues, logTagValuesRequest, logTagValuesResult, logger) + httpReq, tenant, err := buildTagValuesRequestAndExtractTenant(srv.Context(), req, downstreamPath, logger) + if err != nil { + return err + } + prepareRequestForQueriers(httpReq, tenant) + + var finalResponse *tempopb.SearchTagValuesResponse + comb := combiner.NewTypedSearchTagValues(o.MaxBytesPerTagValuesQuery(tenant)) + collector := pipeline.NewGRPCCollector[*tempopb.SearchTagValuesResponse](next, cfg.ResponseConsumers, comb, func(res *tempopb.SearchTagValuesResponse) error { + finalResponse = res // to get the bytes processed for SLO calculations + return srv.Send(res) + }) + + start := time.Now() + logTagValuesRequest(logger, tenant, "SearchTagValuesStreaming", req.TagName, req.Query, req.End-req.Start) + err = collector.RoundTrip(httpReq) + + duration := time.Since(start) + bytesProcessed := uint64(0) + if finalResponse != nil && finalResponse.Metrics != nil { + bytesProcessed = finalResponse.Metrics.InspectedBytes + } + postSLOHook(nil, tenant, bytesProcessed, duration, err) + logTagValuesResult(logger, tenant, "SearchTagValuesStreaming", req.TagName, req.Query, req.End-req.Start, duration.Seconds(), bytesProcessed, err) + + return err } } func newTagValuesV2StreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], apiPrefix string, o overrides.Interface, logger log.Logger) streamingTagValuesV2Handler { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + return func(req *tempopb.SearchTagValuesRequest, srv tempopb.StreamingQuerier_SearchTagValuesV2Server) error { // we have to interpolate the tag name into the path so that when it is routed to the queriers // they will parse it correctly. see also the mux.SetUrlVars discussion below. pathWithValue := strings.Replace(api.PathSearchTagValuesV2, "{"+api.MuxVarTagName+"}", req.TagName, 1) downstreamPath := path.Join(apiPrefix, pathWithValue) - return streamingTags(srv.Context(), cfg, next, req, downstreamPath, req.TagName, o, api.BuildSearchTagValuesRequest, srv.Send, combiner.NewTypedSearchTagValuesV2, logTagValuesRequest, logTagValuesResult, logger) + httpReq, tenant, err := buildTagValuesRequestAndExtractTenant(srv.Context(), req, downstreamPath, logger) + if err != nil { + return err + } + prepareRequestForQueriers(httpReq, tenant) + + var finalResponse *tempopb.SearchTagValuesV2Response + comb := combiner.NewTypedSearchTagValuesV2(o.MaxBytesPerTagValuesQuery(tenant)) + collector := pipeline.NewGRPCCollector[*tempopb.SearchTagValuesV2Response](next, cfg.ResponseConsumers, comb, func(res *tempopb.SearchTagValuesV2Response) error { + finalResponse = res // to get the bytes processed for SLO calculations + return srv.Send(res) + }) + + start := time.Now() + logTagValuesRequest(logger, tenant, "SearchTagValuesV2Streaming", req.TagName, req.Query, req.End-req.Start) + err = collector.RoundTrip(httpReq) + + duration := time.Since(start) + bytesProcessed := uint64(0) + if finalResponse != nil && finalResponse.Metrics != nil { + bytesProcessed = finalResponse.Metrics.InspectedBytes + } + postSLOHook(nil, tenant, bytesProcessed, duration, err) + logTagValuesResult(logger, tenant, "SearchTagValuesV2Streaming", req.TagName, req.Query, req.End-req.Start, duration.Seconds(), bytesProcessed, err) + + return err + } +} + +// HTTP Handlers +func newTagsHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, logger log.Logger) http.RoundTripper { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + // if error is not nil, return error Response but suppress the error + tenant, errResp, err := extractTenantWithErrorResp(req, logger) + if err != nil { + return errResp, nil + } + + scope, _, rangeDur := parseParams(req) + // build and use round tripper + comb := combiner.NewTypedSearchTags(o.MaxBytesPerTagValuesQuery(tenant)) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) + start := time.Now() + logTagsRequest(logger, tenant, "SearchTags", scope, rangeDur) + + resp, err := rt.RoundTrip(req) + + // ask for the typed diff and use that for the SLO hook. it will have up-to-date metrics + var bytesProcessed uint64 + searchResp, _ := comb.GRPCDiff() + if searchResp != nil && searchResp.Metrics != nil { + bytesProcessed = searchResp.Metrics.InspectedBytes + } + + duration := time.Since(start) + postSLOHook(resp, tenant, bytesProcessed, duration, err) + logTagsResult(logger, tenant, "SearchTags", scope, rangeDur, duration.Seconds(), bytesProcessed, err) + + return resp, err + }) +} + +func newTagsV2HTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, logger log.Logger) http.RoundTripper { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + // if error is not nil, return error Response but suppress the error + tenant, errResp, err := extractTenantWithErrorResp(req, logger) + if err != nil { + return errResp, nil + } + + scope, _, rangeDur := parseParams(req) + // build and use round tripper + comb := combiner.NewTypedSearchTagsV2(o.MaxBytesPerTagValuesQuery(tenant)) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) + start := time.Now() + logTagsRequest(logger, tenant, "SearchTagsV2", scope, rangeDur) + + resp, err := rt.RoundTrip(req) + + // ask for the typed diff and use that for the SLO hook. it will have up-to-date metrics + var bytesProcessed uint64 + searchResp, _ := comb.GRPCDiff() + if searchResp != nil && searchResp.Metrics != nil { + bytesProcessed = searchResp.Metrics.InspectedBytes + } + + duration := time.Since(start) + postSLOHook(resp, tenant, bytesProcessed, duration, err) + logTagsResult(logger, tenant, "SearchTagsV2", scope, rangeDur, duration.Seconds(), bytesProcessed, err) + + return resp, err + }) +} + +func newTagValuesHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, logger log.Logger) http.RoundTripper { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + // if error is not nil, return error Response but suppress the error + tenant, errResp, err := extractTenantWithErrorResp(req, logger) + if err != nil { + return errResp, nil + } + + _, query, rangeDur := parseParams(req) + tagName := extractTagName(req.URL.Path, tagNameRegexV1) + + // build and use round tripper + comb := combiner.NewTypedSearchTagValues(o.MaxBytesPerTagValuesQuery(tenant)) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) + start := time.Now() + logTagValuesRequest(logger, tenant, "SearchTagValues", tagName, query, rangeDur) + + resp, err := rt.RoundTrip(req) + + // ask for the typed diff and use that for the SLO hook. it will have up-to-date metrics + var bytesProcessed uint64 + searchResp, _ := comb.GRPCDiff() + if searchResp != nil && searchResp.Metrics != nil { + bytesProcessed = searchResp.Metrics.InspectedBytes + } + + duration := time.Since(start) + postSLOHook(resp, tenant, bytesProcessed, duration, err) + logTagValuesResult(logger, tenant, "SearchTagValues", tagName, query, rangeDur, duration.Seconds(), bytesProcessed, err) + + return resp, err + }) +} + +func newTagValuesV2HTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, logger log.Logger) http.RoundTripper { + postSLOHook := metadataSLOPostHook(cfg.Search.MetadataSLO) + + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + // if error is not nil, return error Response but suppress the error + tenant, errResp, err := extractTenantWithErrorResp(req, logger) + if err != nil { + return errResp, nil + } + + _, query, rangeDur := parseParams(req) + tagName := extractTagName(req.URL.Path, tagNameRegexV2) + + // build and use round tripper + comb := combiner.NewTypedSearchTagValuesV2(o.MaxBytesPerTagValuesQuery(tenant)) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) + start := time.Now() + logTagValuesRequest(logger, tenant, "SearchTagValuesV2", tagName, query, rangeDur) + + resp, err := rt.RoundTrip(req) + + // ask for the typed diff and use that for the SLO hook. it will have up-to-date metrics + var bytesProcessed uint64 + searchResp, _ := comb.GRPCDiff() + if searchResp != nil && searchResp.Metrics != nil { + bytesProcessed = searchResp.Metrics.InspectedBytes + } + + duration := time.Since(start) + postSLOHook(resp, tenant, bytesProcessed, duration, err) + logTagValuesResult(logger, tenant, "SearchTagValuesV2", tagName, query, rangeDur, duration.Seconds(), bytesProcessed, err) + + return resp, err + }) +} + +// helpers +func extractTenantWithErrorResp(req *http.Request, logger log.Logger) (string, *http.Response, error) { + tenant, err := user.ExtractOrgID(req.Context()) + if err != nil { + level.Error(logger).Log("msg", "tags failed to extract orgid", "err", err) + return "", &http.Response{ + StatusCode: http.StatusBadRequest, + Status: http.StatusText(http.StatusBadRequest), + Body: io.NopCloser(strings.NewReader(err.Error())), + }, err } + return tenant, nil, err } -// streamingTags abstracts the boilerplate for streaming tags and tag values -func streamingTags[TReq proto.Message, TResp proto.Message](ctx context.Context, - cfg Config, - next pipeline.AsyncRoundTripper[combiner.PipelineResponse], - req TReq, - downstreamPath string, - tagName string, - o overrides.Interface, - fnBuild func(*http.Request, TReq) (*http.Request, error), - fnSend func(TResp) error, - fnCombiner func(int) combiner.GRPCCombiner[TResp], - logRequest func(log.Logger, string, TReq), - logResult func(log.Logger, string, float64, TReq, error), - logger log.Logger, -) error { - httpReq, err := fnBuild(&http.Request{ - URL: &url.URL{ - Path: downstreamPath, - }, +func buildTagsRequestAndExtractTenant(ctx context.Context, req *tempopb.SearchTagsRequest, downstreamPath string, logger log.Logger) (*http.Request, string, error) { + httpReq, err := api.BuildSearchTagsRequest(&http.Request{ + URL: &url.URL{Path: downstreamPath}, Header: http.Header{}, Body: io.NopCloser(bytes.NewReader([]byte{})), }, req) if err != nil { - level.Error(logger).Log("msg", "search tags: build tags request failed", "err", err) - return status.Errorf(codes.InvalidArgument, "build tags request failed: %s", err.Error()) + _ = level.Error(logger).Log("msg", "search tags: build tags request failed", "err", err) + return nil, "", status.Errorf(codes.InvalidArgument, "build tags request failed: %s", err.Error()) } - httpReq = httpReq.WithContext(ctx) - if tagName != "" { - // the functions that parse an http request in the api package expect the tagName - // to be parsed out of the path so we're injecting it here. this is a hack and - // could be removed if the pipeline were swapped to be a proto.Message pipeline instead of - // an *http.Request pipeline. - httpReq = mux.SetURLVars(httpReq, map[string]string{api.MuxVarTagName: tagName}) - } tenant, err := user.ExtractOrgID(ctx) if err != nil { - level.Error(logger).Log("msg", "search tags: ", "err", err) - return status.Error(codes.InvalidArgument, err.Error()) + _ = level.Error(logger).Log("msg", "search tags: ", "err", err) + return nil, "", status.Error(codes.InvalidArgument, err.Error()) } - prepareRequestForQueriers(httpReq, tenant) - - c := fnCombiner(o.MaxBytesPerTagValuesQuery(tenant)) - collector := pipeline.NewGRPCCollector[TResp](next, cfg.ResponseConsumers, c, fnSend) + return httpReq, tenant, nil +} - start := time.Now() - logRequest(logger, tenant, req) - err = collector.RoundTrip(httpReq) - logResult(logger, tenant, time.Since(start).Seconds(), req, err) +func buildTagValuesRequestAndExtractTenant(ctx context.Context, req *tempopb.SearchTagValuesRequest, downstreamPath string, logger log.Logger) (*http.Request, string, error) { + httpReq, err := api.BuildSearchTagValuesRequest(&http.Request{ + URL: &url.URL{Path: downstreamPath}, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader([]byte{})), + }, req) + if err != nil { + _ = level.Error(logger).Log("msg", "search tag values: build tags values request failed", "err", err) + return nil, "", status.Errorf(codes.InvalidArgument, "build tag values request failed: %s", err.Error()) + } + httpReq = httpReq.WithContext(ctx) - return err -} + // the functions that parse a http request in the api package expect the tagName + // to be parsed out of the path so we're injecting it here. this is a hack and + // could be removed if the pipeline were swapped to be a proto.Message pipeline instead of + // an *http.Request pipeline. + httpReq = mux.SetURLVars(httpReq, map[string]string{api.MuxVarTagName: req.TagName}) -// newTagHTTPHandler returns a handler that returns a single response from the HTTP handler -func newTagHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, fnCombiner func(int) combiner.Combiner, logger log.Logger) http.RoundTripper { - return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { - tenant, err := user.ExtractOrgID(req.Context()) - if err != nil { - level.Error(logger).Log("msg", "tags failed to extract orgid", "err", err) - return &http.Response{ - StatusCode: http.StatusBadRequest, - Status: http.StatusText(http.StatusBadRequest), - Body: io.NopCloser(strings.NewReader(err.Error())), - }, nil - } + tenant, err := user.ExtractOrgID(ctx) + if err != nil { + _ = level.Error(logger).Log("msg", "search tag values: ", "err", err) + return nil, "", status.Error(codes.InvalidArgument, err.Error()) + } - // build and use roundtripper - combiner := fnCombiner(o.MaxBytesPerTagValuesQuery(tenant)) - rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, combiner) + return httpReq, tenant, nil +} - return rt.RoundTrip(req) - }) +func logTagsRequest(logger log.Logger, tenantID, handler, scope string, rangeSeconds uint32) { + level.Info(logger).Log( + "msg", "search tag request", + "tenant", tenantID, + "handler", handler, + "scope", scope, + "range_seconds", rangeSeconds) } -func logTagsResult(logger log.Logger, tenantID string, durationSeconds float64, req *tempopb.SearchTagsRequest, err error) { +func logTagsResult(logger log.Logger, tenantID, handler, scope string, rangeSeconds uint32, durationSeconds float64, inspectedBytes uint64, err error) { level.Info(logger).Log( "msg", "search tag results", "tenant", tenantID, - "scope", req.Scope, - "range_seconds", req.End-req.Start, + "handler", handler, + "scope", scope, + "range_seconds", rangeSeconds, "duration_seconds", durationSeconds, + "inspected_bytes", inspectedBytes, + "request_throughput", float64(inspectedBytes)/durationSeconds, "error", err) } -func logTagsRequest(logger log.Logger, tenantID string, req *tempopb.SearchTagsRequest) { +func logTagValuesRequest(logger log.Logger, tenantID, handler, tagName, query string, rangeSeconds uint32) { level.Info(logger).Log( - "msg", "search tag request", + "msg", "search tag values request", "tenant", tenantID, - "scope", req.Scope, - "range_seconds", req.End-req.Start) + "handler", handler, + "tag", tagName, + "query", query, + "range_seconds", rangeSeconds) } -func logTagValuesResult(logger log.Logger, tenantID string, durationSeconds float64, req *tempopb.SearchTagValuesRequest, err error) { +func logTagValuesResult(logger log.Logger, tenantID, handler, tagName, query string, rangeSeconds uint32, durationSeconds float64, inspectedBytes uint64, err error) { level.Info(logger).Log( - "msg", "search tag results", + "msg", "search tag values results", "tenant", tenantID, - "tag", req.TagName, - "query", req.Query, - "range_seconds", req.End-req.Start, + "handler", handler, + "tag", tagName, + "query", query, + "range_seconds", rangeSeconds, "duration_seconds", durationSeconds, + "inspected_bytes", inspectedBytes, + "request_throughput", float64(inspectedBytes)/durationSeconds, "error", err) } -func logTagValuesRequest(logger log.Logger, tenantID string, req *tempopb.SearchTagValuesRequest) { - level.Info(logger).Log( - "msg", "search tag request", - "tenant", tenantID, - "tag", req.TagName, - "query", req.Query, - "range_seconds", req.End-req.Start) +// parseParams parses optional 'start', 'end', 'scope', and 'q' params from a http.Request +// returns scope, query and duration (end - start). returns "", and 0 if these params are invalid or absent +func parseParams(req *http.Request) (string, string, uint32) { + query := req.URL.Query() + + scope := query.Get("scope") + q := query.Get("q") + // ignore errors, we default to 0 as params are not always present. + start, _ := strconv.ParseInt(query.Get("start"), 10, 64) + end, _ := strconv.ParseInt(query.Get("end"), 10, 64) + + var duration int64 + // duration only makes sense if start and end are present and end is greater than start + if start > 0 && end > 0 && end > start { + duration = end - start + } + return scope, q, uint32(duration) +} + +// extractTagName extracts the tagName based on the provided regex pattern +func extractTagName(path string, pattern *regexp.Regexp) string { + matches := pattern.FindStringSubmatch(path) + if len(matches) > 1 { + return matches[1] + } + return "" } diff --git a/modules/frontend/tag_handlers_test.go b/modules/frontend/tag_handlers_test.go index 08b64bac08d..23f46147f83 100644 --- a/modules/frontend/tag_handlers_test.go +++ b/modules/frontend/tag_handlers_test.go @@ -8,6 +8,8 @@ import ( "io" "net/http" "net/http/httptest" + "net/url" + "regexp" "testing" "time" @@ -512,6 +514,7 @@ func TestSearchTagsV2AccessesCache(t *testing.T) { Tags: []string{"blarg", "blerg"}, }, }, + Metrics: &tempopb.MetadataMetrics{}, } overwriteString, err := (&jsonpb.Marshaler{}).MarshalToString(overwriteResp) require.NoError(t, err) @@ -532,3 +535,190 @@ func TestSearchTagsV2AccessesCache(t *testing.T) { require.Equal(t, overwriteResp, actualResp) } + +func TestParseParams(t *testing.T) { + tests := []struct { + name string + queryParams map[string]string + expectedScope string + expectedQ string + expectedDuration uint32 + }{ + { + name: "all params present", + queryParams: map[string]string{"start": "1723667082", "end": "1723839882", "scope": "resource", "q": "some_query"}, + expectedScope: "resource", + expectedQ: "some_query", + expectedDuration: 172800, + }, + { + name: "missing start", + queryParams: map[string]string{"end": "1723839882", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "missing end", + queryParams: map[string]string{"start": "1723667082", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "missing scope", + queryParams: map[string]string{"start": "1723667082", "end": "1723839882"}, + expectedScope: "", + expectedQ: "", + expectedDuration: 172800, + }, + { + name: "missing q", + queryParams: map[string]string{"start": "1723667082", "end": "1723839882", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 172800, + }, + { + name: "invalid start", + queryParams: map[string]string{"start": "invalid", "end": "1723839882", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "invalid end", + queryParams: map[string]string{"start": "1723667082", "end": "invalid", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "no params", + queryParams: map[string]string{}, + expectedScope: "", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "negative start and end", + queryParams: map[string]string{"start": "-1000", "end": "-2000", "scope": "negative_case"}, + expectedScope: "negative_case", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "end less than start", + queryParams: map[string]string{"start": "1723839882", "end": "1723667082", "scope": "resource"}, + expectedScope: "resource", + expectedQ: "", + expectedDuration: 0, + }, + { + name: "start and end are the same", + queryParams: map[string]string{"start": "1723839882", "end": "1723839882", "scope": "zero_duration"}, + expectedScope: "zero_duration", + expectedQ: "", + expectedDuration: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + u := &url.URL{Path: "/my/test/path"} + query := u.Query() + for key, value := range tt.queryParams { + query.Add(key, value) + } + u.RawQuery = query.Encode() + req := &http.Request{URL: u} + + scope, q, duration := parseParams(req) + + require.Equal(t, tt.expectedScope, scope) + require.Equal(t, tt.expectedQ, q) + require.Equal(t, tt.expectedDuration, duration) + }) + } +} + +func TestExtractTagName(t *testing.T) { + // Define the base of our test cases table + var testCases []struct { + name string + urlPath string + pattern *regexp.Regexp + expected string + } + + prefixes := []string{ + "/tempo", + "/otherprefix", + "", // No prefix + } + tagNames := []string{ + ".X-Ab-TraceID", + ".__name__", + ".action", + ".app", + ".application_id", + "span.name", + "hello", + "name", + "$tag_name", + "\u00E9:tag\\escaped_tag", + } + patterns := []struct { + name string + regex *regexp.Regexp + suffix string + }{ + {"WithoutV2", tagNameRegexV1, "/api/search/tag/"}, + {"WithV2", tagNameRegexV2, "/api/v2/search/tag/"}, + } + + // build test cases + for _, prefix := range prefixes { + for _, tagName := range tagNames { + for _, pattern := range patterns { + // Construct the full path + fullPath := prefix + pattern.suffix + tagName + "/values" + + // Add the test case to the array + testCases = append(testCases, struct { + name string + urlPath string + pattern *regexp.Regexp + expected string + }{ + name: "Prefix: " + prefix + ", Tag: " + tagName + ", Pattern: " + pattern.name, + urlPath: fullPath, + pattern: pattern.regex, + expected: tagName, + }) + } + } + } + + // Additional edge cases + edgeCases := []struct { + name string + urlPath string + pattern *regexp.Regexp + expected string + }{ + {"Missing tag name V1", "/api/search/tag//values", tagNameRegexV1, ""}, + {"Missing tag name V2", "/api/v2/search/tag//values", tagNameRegexV2, ""}, + {"Non-matching path V1", "/some/other/path/without/tag/values", tagNameRegexV1, ""}, + {"Non-matching path V2", "/different/path/without/tag/values", tagNameRegexV2, ""}, + } + testCases = append(testCases, edgeCases...) + + // Run all test cases + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + extractedTagName := extractTagName(tt.urlPath, tt.pattern) + require.Equal(t, tt.expected, extractedTagName) + }) + } +} diff --git a/modules/frontend/tag_sharder.go b/modules/frontend/tag_sharder.go index 8328ad2b58d..b3862ba889d 100644 --- a/modules/frontend/tag_sharder.go +++ b/modules/frontend/tag_sharder.go @@ -213,14 +213,14 @@ func (s searchTagSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline. // build request to search ingester based on query_ingesters_until config and time range // pass subCtx in requests, so we can cancel and exit early - ingesterReq, err := s.ingesterRequest(ctx, tenantID, r, searchReq) + ingesterReq, err := s.ingesterRequest(ctx, tenantID, pipelineRequest, searchReq) if err != nil { return nil, err } reqCh := make(chan pipeline.Request, 1) // buffer of 1 allows us to insert ingestReq if it exists if ingesterReq != nil { - reqCh <- pipeline.NewHTTPRequest(ingesterReq) + reqCh <- ingesterReq } s.backendRequests(ctx, tenantID, r, searchReq, reqCh, func(err error) { @@ -228,6 +228,9 @@ func (s searchTagSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline. s.logger.Log("msg", "failed to build backend requests", "err", err) }) + // TODO(suraj): send jobMetricsResponse like we send in asyncSearchSharder.RoundTrip and accumulate these metrics in the + // combiners, and log these metrics in the logger like we do in search_handlers.go + // execute requests return pipeline.NewAsyncSharderChan(ctx, s.cfg.ConcurrentRequests, reqCh, nil, s.next), nil } @@ -318,7 +321,7 @@ func (s searchTagSharder) buildBackendRequests(ctx context.Context, tenantID str // that covers the ingesters. If nil is returned for the http.Request then there is no ingesters query. // we should do a copy of the searchReq before use this function, as it is an interface, we cannot guaranteed be passed // by value. -func (s searchTagSharder) ingesterRequest(ctx context.Context, tenantID string, parent *http.Request, searchReq tagSearchReq) (*http.Request, error) { +func (s searchTagSharder) ingesterRequest(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tagSearchReq) (*pipeline.HTTPRequest, error) { // request without start or end, search only in ingester if searchReq.start() == 0 || searchReq.end() == 0 { return s.buildIngesterRequest(ctx, tenantID, parent, searchReq) @@ -349,14 +352,14 @@ func (s searchTagSharder) ingesterRequest(ctx context.Context, tenantID string, return s.buildIngesterRequest(ctx, tenantID, parent, newSearchReq) } -func (s searchTagSharder) buildIngesterRequest(ctx context.Context, tenantID string, parent *http.Request, searchReq tagSearchReq) (*http.Request, error) { - subR := parent.Clone(ctx) +func (s searchTagSharder) buildIngesterRequest(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tagSearchReq) (*pipeline.HTTPRequest, error) { + subR := parent.HTTPRequest().Clone(ctx) subR, err := searchReq.buildSearchTagRequest(subR) if err != nil { return nil, err } prepareRequestForQueriers(subR, tenantID) - return subR, nil + return parent.CloneFromHTTPRequest(subR), nil } // maxDuration returns the max search duration allowed for this tenant. diff --git a/modules/frontend/tag_sharder_test.go b/modules/frontend/tag_sharder_test.go index dfc1460c000..150cd5e0dc2 100644 --- a/modules/frontend/tag_sharder_test.go +++ b/modules/frontend/tag_sharder_test.go @@ -257,6 +257,7 @@ func TestTagsIngesterRequest(t *testing.T) { } req := httptest.NewRequest("GET", tc.request, nil) + pipelineReq := pipeline.NewHTTPRequest(req) searchReq := fakeReq{ startValue: uint32(tc.start), @@ -264,7 +265,7 @@ func TestTagsIngesterRequest(t *testing.T) { } copyReq := searchReq - actualReq, err := s.ingesterRequest(context.Background(), "test", req, &searchReq) + actualReq, err := s.ingesterRequest(context.Background(), "test", pipelineReq, &searchReq) if tc.expectedError != nil { assert.Equal(t, tc.expectedError, err) continue @@ -273,7 +274,7 @@ func TestTagsIngesterRequest(t *testing.T) { if tc.expectedURI == "" { assert.Nil(t, actualReq) } else { - assert.Equal(t, tc.expectedURI, actualReq.RequestURI) + assert.Equal(t, tc.expectedURI, actualReq.HTTPRequest().RequestURI) } // it may seem odd to test that the searchReq is not modified, but this is to prevent an issue that diff --git a/modules/frontend/traceid_handlers.go b/modules/frontend/traceid_handlers.go index 33404d1ec20..34a0564d01b 100644 --- a/modules/frontend/traceid_handlers.go +++ b/modules/frontend/traceid_handlers.go @@ -16,7 +16,7 @@ import ( ) // newTraceIDHandler creates a http.handler for trace by id requests -func newTraceIDHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, combiner func(int, string) combiner.Combiner, logger log.Logger) http.RoundTripper { +func newTraceIDHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.PipelineResponse], o overrides.Interface, combinerFn func(int, string) combiner.Combiner, logger log.Logger) http.RoundTripper { postSLOHook := traceByIDSLOPostHook(cfg.TraceByID.SLO) return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { @@ -64,8 +64,8 @@ func newTraceIDHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.Pipe "tenant", tenant, "path", req.URL.Path) - combiner := combiner(o.MaxBytesPerTrace(tenant), marshallingFormat) - rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, combiner) + comb := combinerFn(o.MaxBytesPerTrace(tenant), marshallingFormat) + rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) start := time.Now() resp, err := rt.RoundTrip(req) diff --git a/modules/frontend/traceid_handlers_test.go b/modules/frontend/traceid_handlers_test.go index c2fc4371750..69e16abe036 100644 --- a/modules/frontend/traceid_handlers_test.go +++ b/modules/frontend/traceid_handlers_test.go @@ -14,6 +14,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/gorilla/mux" "github.com/grafana/dskit/user" + "github.com/grafana/tempo/modules/frontend/pipeline" "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" "github.com/grafana/tempo/pkg/util/test" @@ -159,11 +160,11 @@ func TestTraceIDHandler(t *testing.T) { for _, tc := range tests { tc := tc // copy the test case to prevent race on the loop variable t.Run(tc.name, func(t *testing.T) { - next := RoundTripperFunc(func(r *http.Request) (*http.Response, error) { + next := pipeline.RoundTripperFunc(func(r pipeline.Request) (*http.Response, error) { var testTrace *tempopb.Trace var statusCode int var err error - if r.RequestURI == "/querier/api/traces/1234?mode=ingesters" { + if r.HTTPRequest().RequestURI == "/querier/api/traces/1234?mode=ingesters" { testTrace = tc.trace1 statusCode = tc.status1 err = tc.err1 @@ -236,7 +237,7 @@ func TestTraceIDHandler(t *testing.T) { } func TestTraceIDHandlerForJSONResponse(t *testing.T) { - next := RoundTripperFunc(func(_ *http.Request) (*http.Response, error) { + next := pipeline.RoundTripperFunc(func(_ pipeline.Request) (*http.Response, error) { testTrace := test.MakeTrace(2, []byte{0x01, 0x02}) resBytes, _ := proto.Marshal(&tempopb.TraceByIDResponse{ Trace: testTrace, @@ -354,11 +355,11 @@ func TestTraceIDHandlerV2(t *testing.T) { for _, tc := range tests { tc := tc // copy the test case to prevent race on the loop variable t.Run(tc.name, func(t *testing.T) { - next := RoundTripperFunc(func(r *http.Request) (*http.Response, error) { + next := pipeline.RoundTripperFunc(func(r pipeline.Request) (*http.Response, error) { + var err error var testTrace *tempopb.Trace var statusCode int - var err error - if r.RequestURI == "/querier/api/v2/traces/1234?mode=ingesters" { + if r.HTTPRequest().RequestURI == "/querier/api/v2/traces/1234?mode=ingesters" { testTrace = tc.trace1 statusCode = tc.status1 err = tc.err1 @@ -447,7 +448,7 @@ func TestTraceIDHandlerV2WithJSONResponse(t *testing.T) { } } - next := RoundTripperFunc(func(_ *http.Request) (*http.Response, error) { + next := pipeline.RoundTripperFunc(func(_ pipeline.Request) (*http.Response, error) { var err error resBytes, err := proto.Marshal(&tempopb.TraceByIDResponse{ Trace: splitTrace, diff --git a/modules/frontend/traceid_sharder.go b/modules/frontend/traceid_sharder.go index 458b4bd7c31..21bdfa47d24 100644 --- a/modules/frontend/traceid_sharder.go +++ b/modules/frontend/traceid_sharder.go @@ -66,7 +66,8 @@ func (s asyncTraceSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline } return pipeline.NewAsyncSharderFunc(ctx, int(concurrentShards), len(reqs), func(i int) pipeline.Request { - return pipeline.NewHTTPRequest(reqs[i]) + pipelineReq := pipelineRequest.CloneFromHTTPRequest(reqs[i]) + return pipelineReq }, s.next), nil } diff --git a/modules/frontend/transport/roundtripper.go b/modules/frontend/transport/roundtripper.go deleted file mode 100644 index c8fe2db1489..00000000000 --- a/modules/frontend/transport/roundtripper.go +++ /dev/null @@ -1,56 +0,0 @@ -package transport - -import ( - "bytes" - "context" - "io" - "net/http" - - "github.com/grafana/dskit/httpgrpc" -) - -// GrpcRoundTripper is similar to http.RoundTripper, but works with HTTP requests converted to protobuf messages. -type GrpcRoundTripper interface { - RoundTripGRPC(context.Context, *httpgrpc.HTTPRequest) (*httpgrpc.HTTPResponse, error) -} - -func AdaptGrpcRoundTripperToHTTPRoundTripper(r GrpcRoundTripper) http.RoundTripper { - return &grpcRoundTripperAdapter{roundTripper: r} -} - -// This adapter wraps GrpcRoundTripper and converted it into http.RoundTripper -type grpcRoundTripperAdapter struct { - roundTripper GrpcRoundTripper -} - -type buffer struct { - buff []byte - io.ReadCloser -} - -func (b *buffer) Bytes() []byte { - return b.buff -} - -func (a *grpcRoundTripperAdapter) RoundTrip(r *http.Request) (*http.Response, error) { - req, err := httpgrpc.FromHTTPRequest(r) - if err != nil { - return nil, err - } - - resp, err := a.roundTripper.RoundTripGRPC(r.Context(), req) - if err != nil { - return nil, err - } - - httpResp := &http.Response{ - StatusCode: int(resp.Code), - Body: &buffer{buff: resp.Body, ReadCloser: io.NopCloser(bytes.NewReader(resp.Body))}, - Header: http.Header{}, - ContentLength: int64(len(resp.Body)), - } - for _, h := range resp.Headers { - httpResp.Header[h.Key] = h.Values - } - return httpResp, nil -} diff --git a/modules/frontend/v1/frontend.go b/modules/frontend/v1/frontend.go index 7f67bd486ea..7a83d95d6c8 100644 --- a/modules/frontend/v1/frontend.go +++ b/modules/frontend/v1/frontend.go @@ -5,6 +5,7 @@ import ( "errors" "flag" "fmt" + "net/http" "sync/atomic" "time" @@ -17,10 +18,10 @@ import ( "github.com/grafana/dskit/httpgrpc" "github.com/grafana/dskit/services" "github.com/grafana/dskit/tenant" - "github.com/grafana/tempo/pkg/util/httpgrpcutil" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/grafana/tempo/modules/frontend/pipeline" "github.com/grafana/tempo/modules/frontend/queue" "github.com/grafana/tempo/modules/frontend/v1/frontendv1pb" "github.com/grafana/tempo/pkg/util" @@ -69,11 +70,18 @@ type Frontend struct { type request struct { enqueueTime time.Time queueSpan trace.Span - originalCtx context.Context - request *httpgrpc.HTTPRequest + request pipeline.Request err chan error - response chan *httpgrpc.HTTPResponse + response chan *http.Response +} + +func (r *request) Weight() int { + return r.request.Weight() +} + +func (r *request) OriginalContext() context.Context { + return r.request.Context() } // New creates a new frontend. Frontend implements service, and must be started and stopped. @@ -163,23 +171,19 @@ func (f *Frontend) cleanupInactiveUserMetrics(user string) { f.discardedRequests.DeleteLabelValues(user) } -// RoundTripGRPC round trips a proto (instead of a HTTP request). -func (f *Frontend) RoundTripGRPC(ctx context.Context, req *httpgrpc.HTTPRequest) (*httpgrpc.HTTPResponse, error) { - // Propagate trace context in gRPC too - this will be ignored if using HTTP. - carrier := (*httpgrpcutil.HttpgrpcHeadersCarrier)(req) - otel.GetTextMapPropagator().Inject(ctx, carrier) - +// RoundTrip a HTTP request +func (f *Frontend) RoundTrip(req pipeline.Request) (*http.Response, error) { request := request{ - request: req, - originalCtx: ctx, + request: req, // Buffer of 1 to ensure response can be written by the server side // of the Process stream, even if this goroutine goes away due to // client context cancellation. err: make(chan error, 1), - response: make(chan *httpgrpc.HTTPResponse, 1), + response: make(chan *http.Response, 1), } + ctx := req.Context() if err := f.queueRequest(ctx, &request); err != nil { return nil, err } @@ -229,11 +233,14 @@ func (f *Frontend) Process(server frontendv1pb.Frontend_ProcessServer) error { req.queueSpan.End() // only add if not expired - if req.originalCtx.Err() != nil { + if req.OriginalContext().Err() != nil { continue } - reqBatch.add(req) + err = reqBatch.add(req) + if err != nil { + return fmt.Errorf("unexpected error adding request to batch: %w", err) + } } // if all requests are expired then continue requesting jobs for this user. this nicely diff --git a/modules/frontend/v1/request_batch.go b/modules/frontend/v1/request_batch.go index 6aae745a20b..e67e820322b 100644 --- a/modules/frontend/v1/request_batch.go +++ b/modules/frontend/v1/request_batch.go @@ -1,10 +1,16 @@ package v1 import ( + "bytes" "fmt" + "io" + "net/http" "github.com/grafana/dskit/httpgrpc" "github.com/grafana/dskit/multierror" + "github.com/grafana/tempo/pkg/util/httpgrpcutil" + + "go.opentelemetry.io/otel" ) type requestBatch struct { @@ -14,14 +20,35 @@ type requestBatch struct { wireRequests []*httpgrpc.HTTPRequest } +type buffer struct { + buff []byte + io.ReadCloser +} + +func (b *buffer) Bytes() []byte { + return b.buff +} + func (b *requestBatch) clear() { b.pipelineRequests = b.pipelineRequests[:0] b.wireRequests = b.wireRequests[:0] } -func (b *requestBatch) add(r *request) { +func (b *requestBatch) add(r *request) error { b.pipelineRequests = append(b.pipelineRequests, r) - b.wireRequests = append(b.wireRequests, r.request) + + req, err := httpgrpc.FromHTTPRequest(r.request.HTTPRequest()) + if err != nil { + return err + } + + // Propagate trace context in gRPC too - this will be ignored if using HTTP. + carrier := (*httpgrpcutil.HttpgrpcHeadersCarrier)(req) + otel.GetTextMapPropagator().Inject(r.OriginalContext(), carrier) + + b.wireRequests = append(b.wireRequests, req) + + return nil } func (b *requestBatch) httpGrpcRequests() []*httpgrpc.HTTPRequest { @@ -36,7 +63,7 @@ func (b *requestBatch) contextError() error { multiErr := multierror.New() for _, r := range b.pipelineRequests { - if err := r.originalCtx.Err(); err != nil { + if err := r.OriginalContext().Err(); err != nil { multiErr.Add(err) } } @@ -52,7 +79,7 @@ func (b *requestBatch) contextError() error { // will belong to the same upstream http query. func (b *requestBatch) doneChan(stop <-chan struct{}) <-chan struct{} { if len(b.pipelineRequests) == 1 { - return b.pipelineRequests[0].originalCtx.Done() + return b.pipelineRequests[0].OriginalContext().Done() } done := make(chan struct{}) @@ -63,7 +90,7 @@ func (b *requestBatch) doneChan(stop <-chan struct{}) <-chan struct{} { // if all are done. for _, r := range b.pipelineRequests { select { - case <-r.originalCtx.Done(): + case <-r.OriginalContext().Done(): case <-stop: return } @@ -87,8 +114,23 @@ func (b *requestBatch) reportResultsToPipeline(responses []*httpgrpc.HTTPRespons } for i, r := range b.pipelineRequests { - r.response <- responses[i] + r.response <- httpGRPCResponseToHTTPResponse(responses[i]) } return nil } + +func httpGRPCResponseToHTTPResponse(resp *httpgrpc.HTTPResponse) *http.Response { + // translate back + httpResp := &http.Response{ + StatusCode: int(resp.Code), + Body: &buffer{buff: resp.Body, ReadCloser: io.NopCloser(bytes.NewReader(resp.Body))}, + Header: http.Header{}, + ContentLength: int64(len(resp.Body)), + } + for _, h := range resp.Headers { + httpResp.Header[h.Key] = h.Values + } + + return httpResp +} diff --git a/modules/frontend/v1/request_batch_test.go b/modules/frontend/v1/request_batch_test.go index ebbbc6e53bf..931abae26c3 100644 --- a/modules/frontend/v1/request_batch_test.go +++ b/modules/frontend/v1/request_batch_test.go @@ -1,12 +1,17 @@ package v1 import ( + "bytes" "context" "errors" + "net/http" + "net/http/httptest" "sync" "testing" "github.com/grafana/dskit/httpgrpc" + "github.com/grafana/tempo/modules/frontend/pipeline" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -16,10 +21,9 @@ func TestRequestBatchBasics(t *testing.T) { const totalRequests = 3 for i := byte(0); i < totalRequests; i++ { - rb.add(&request{ - request: &httpgrpc.HTTPRequest{ - Body: []byte{i}, - }, + req := httptest.NewRequest("GET", "http://example.com", bytes.NewReader([]byte{i})) + _ = rb.add(&request{ + request: pipeline.NewHTTPRequest(req), }) } @@ -43,21 +47,23 @@ func TestRequestBatchBasics(t *testing.T) { func TestRequestBatchContextError(t *testing.T) { rb := &requestBatch{} - + ctx := context.Background() const totalRequests = 3 - ctx := context.Background() + req := httptest.NewRequest("GET", "http://example.com", nil) + prequest := pipeline.NewHTTPRequest(req) + prequest.WithContext(ctx) + for i := 0; i < totalRequests-1; i++ { - rb.add(&request{ - originalCtx: ctx, - }) + _ = rb.add(&request{request: prequest}) } // add a cancel context cancelCtx, cancel := context.WithCancel(ctx) - rb.add(&request{ - originalCtx: cancelCtx, - }) + prequest = pipeline.NewHTTPRequest(req) + prequest.WithContext(cancelCtx) + + _ = rb.add(&request{request: prequest}) // confirm ok require.NoError(t, rb.contextError()) @@ -74,10 +80,13 @@ func TestDoneChanCloses(_ *testing.T) { ctx := context.Background() cancelCtx, cancel := context.WithCancel(ctx) + + req := httptest.NewRequest("GET", "http://example.com", nil) + prequest := pipeline.NewHTTPRequest(req) + prequest.WithContext(cancelCtx) + for i := 0; i < totalRequests-1; i++ { - rb.add(&request{ - originalCtx: cancelCtx, - }) + _ = rb.add(&request{request: prequest}) } wg := &sync.WaitGroup{} @@ -97,11 +106,11 @@ func TestDoneChanClosesOnStop(_ *testing.T) { rb := &requestBatch{} const totalRequests = 3 + req := httptest.NewRequest("GET", "http://example.com", nil) - ctx := context.Background() for i := 0; i < totalRequests-1; i++ { - rb.add(&request{ - originalCtx: ctx, + _ = rb.add(&request{ + request: pipeline.NewHTTPRequest(req), }) } @@ -134,9 +143,10 @@ func TestErrorsPropagateUpstream(t *testing.T) { require.ErrorContains(t, err, "foo") wg.Done() }() - - rb.add(&request{ - err: errChan, + req := httptest.NewRequest("GET", "http://example.com", nil) + _ = rb.add(&request{ + request: pipeline.NewHTTPRequest(req), + err: errChan, }) } @@ -152,16 +162,18 @@ func TestResponsesPropagateUpstream(t *testing.T) { wg := &sync.WaitGroup{} for i := int32(0); i < totalRequests; i++ { - responseChan := make(chan *httpgrpc.HTTPResponse) + responseChan := make(chan *http.Response) wg.Add(1) go func(expectedCode int32) { resp := <-responseChan - require.Equal(t, expectedCode, resp.Code) + assert.Equal(t, expectedCode, int32(resp.StatusCode)) wg.Done() }(i) - rb.add(&request{ + req := httptest.NewRequest("GET", "http://example.com", nil) + _ = rb.add(&request{ + request: pipeline.NewHTTPRequest(req), response: responseChan, }) } diff --git a/modules/generator/generator_test.go b/modules/generator/generator_test.go index 3600d1c817b..dcfafaca163 100644 --- a/modules/generator/generator_test.go +++ b/modules/generator/generator_test.go @@ -2,16 +2,24 @@ package generator import ( "context" + "flag" "fmt" "os" "path/filepath" + "runtime" + "strconv" "testing" "time" "github.com/go-kit/log" "github.com/grafana/dskit/services" "github.com/grafana/tempo/modules/generator/processor/spanmetrics" + "github.com/grafana/tempo/modules/generator/storage" "github.com/grafana/tempo/modules/overrides" + "github.com/grafana/tempo/pkg/tempopb" + common_v1 "github.com/grafana/tempo/pkg/tempopb/common/v1" + trace_v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" + "github.com/grafana/tempo/pkg/util/test" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" @@ -129,3 +137,140 @@ func (l testLogger) Log(keyvals ...interface{}) error { l.t.Log(keyvals...) return nil } + +func BenchmarkPushSpans(b *testing.B) { + var ( + tenant = "test-tenant" + reg = prometheus.NewRegistry() + ctx = context.Background() + log = log.NewNopLogger() + cfg = &Config{} + + walcfg = &storage.Config{ + Path: b.TempDir(), + } + + o = &mockOverrides{ + processors: map[string]struct{}{ + "span-metrics": {}, + "service-graphs": {}, + }, + spanMetricsEnableTargetInfo: true, + spanMetricsTargetInfoExcludedDimensions: []string{"excluded}"}, + } + ) + + cfg.RegisterFlagsAndApplyDefaults("", &flag.FlagSet{}) + + wal, err := storage.New(walcfg, o, tenant, reg, log) + require.NoError(b, err) + + inst, err := newInstance(cfg, tenant, o, wal, reg, log, nil, nil) + require.NoError(b, err) + defer inst.shutdown() + + req := &tempopb.PushSpansRequest{ + Batches: []*trace_v1.ResourceSpans{ + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + }, + } + + // Add more resource attributes to get closer to real data + // Add integer to increase cardinality. + // Currently this is about 80 active series + // TODO - Get more series + for i, b := range req.Batches { + b.Resource.Attributes = append(b.Resource.Attributes, []*common_v1.KeyValue{ + {Key: "k8s.cluster.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.namespace.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.node.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.pod.ip", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.pod.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "excluded", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + }...) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + inst.pushSpans(ctx, req) + } + + b.StopTimer() + runtime.GC() + mem := runtime.MemStats{} + runtime.ReadMemStats(&mem) + b.ReportMetric(float64(mem.HeapInuse), "heap_in_use") +} + +func BenchmarkCollect(b *testing.B) { + var ( + tenant = "test-tenant" + reg = prometheus.NewRegistry() + ctx = context.Background() + log = log.NewNopLogger() + cfg = &Config{} + + walcfg = &storage.Config{ + Path: b.TempDir(), + } + + o = &mockOverrides{ + processors: map[string]struct{}{ + "span-metrics": {}, + "service-graphs": {}, + }, + spanMetricsDimensions: []string{"k8s.cluster.name", "k8s.namespace.name"}, + spanMetricsEnableTargetInfo: true, + spanMetricsTargetInfoExcludedDimensions: []string{"excluded}"}, + // nativeHistograms: overrides.HistogramMethodBoth, + } + ) + + cfg.RegisterFlagsAndApplyDefaults("", &flag.FlagSet{}) + + wal, err := storage.New(walcfg, o, tenant, reg, log) + require.NoError(b, err) + + inst, err := newInstance(cfg, tenant, o, wal, reg, log, nil, nil) + require.NoError(b, err) + defer inst.shutdown() + + req := &tempopb.PushSpansRequest{ + Batches: []*trace_v1.ResourceSpans{ + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + test.MakeBatch(100, nil), + }, + } + + // Add more resource attributes to get closer to real data + // Add integer to increase cardinality. + // Currently this is about 80 active series + // TODO - Get more series + for i, b := range req.Batches { + b.Resource.Attributes = append(b.Resource.Attributes, []*common_v1.KeyValue{ + {Key: "k8s.cluster.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.namespace.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.node.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.pod.ip", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "k8s.pod.name", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + {Key: "excluded", Value: &common_v1.AnyValue{Value: &common_v1.AnyValue_StringValue{StringValue: "test" + strconv.Itoa(i)}}}, + }...) + } + inst.pushSpans(ctx, req) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + inst.registry.CollectMetrics(ctx) + } + + b.StopTimer() + runtime.GC() + mem := runtime.MemStats{} + runtime.ReadMemStats(&mem) + b.ReportMetric(float64(mem.HeapInuse), "heap_in_use") +} diff --git a/modules/generator/instance.go b/modules/generator/instance.go index fec65a4e945..3056a63bf65 100644 --- a/modules/generator/instance.go +++ b/modules/generator/instance.go @@ -357,6 +357,8 @@ func (i *instance) pushSpans(ctx context.Context, req *tempopb.PushSpansRequest) } func (i *instance) preprocessSpans(req *tempopb.PushSpansRequest) { + // TODO - uniqify all strings? + // Doesn't help allocs, but should greatly reduce inuse space size := 0 spanCount := 0 expiredSpanCount := 0 diff --git a/modules/generator/processor/localblocks/processor_test.go b/modules/generator/processor/localblocks/processor_test.go index fdae5936258..fcd90aa35c4 100644 --- a/modules/generator/processor/localblocks/processor_test.go +++ b/modules/generator/processor/localblocks/processor_test.go @@ -336,15 +336,15 @@ func (m *mockBlock) Search(context.Context, *tempopb.SearchRequest, common.Searc return nil, nil } -func (m *mockBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.SearchOptions) error { +func (m *mockBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.MetricsCallback, common.SearchOptions) error { return nil } -func (m *mockBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.SearchOptions) error { +func (m *mockBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return nil } -func (m *mockBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.SearchOptions) error { +func (m *mockBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.MetricsCallback, common.SearchOptions) error { return nil } @@ -352,11 +352,11 @@ func (m *mockBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.Sea return traceql.FetchSpansResponse{}, nil } -func (m *mockBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +func (m *mockBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return nil } -func (m *mockBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.SearchOptions) error { +func (m *mockBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return nil } diff --git a/modules/generator/processor/servicegraphs/servicegraphs.go b/modules/generator/processor/servicegraphs/servicegraphs.go index ae71937f478..40c937f1102 100644 --- a/modules/generator/processor/servicegraphs/servicegraphs.go +++ b/modules/generator/processor/servicegraphs/servicegraphs.go @@ -184,7 +184,7 @@ func (p *Processor) consume(resourceSpans []*v1_trace.ResourceSpans) (err error) for _, ils := range rs.ScopeSpans { for _, span := range ils.Spans { connectionType := store.Unknown - spanMultiplier := processor_util.GetSpanMultiplier(p.Cfg.SpanMultiplierKey, span) + spanMultiplier := processor_util.GetSpanMultiplier(p.Cfg.SpanMultiplierKey, span, rs.Resource) switch span.Kind { case v1_trace.Span_SPAN_KIND_PRODUCER: // override connection type and continue processing as span kind client diff --git a/modules/generator/processor/spanmetrics/spanmetrics.go b/modules/generator/processor/spanmetrics/spanmetrics.go index 0cb2cf19151..492edabd7c5 100644 --- a/modules/generator/processor/spanmetrics/spanmetrics.go +++ b/modules/generator/processor/spanmetrics/spanmetrics.go @@ -121,8 +121,8 @@ func (p *Processor) aggregateMetrics(resourceSpans []*v1_trace.ResourceSpans) { svcName, _ := processor_util.FindServiceName(rs.Resource.Attributes) jobName := processor_util.GetJobValue(rs.Resource.Attributes) instanceID, _ := processor_util.FindInstanceID(rs.Resource.Attributes) - resourceLabels := make([]string, 0) - resourceValues := make([]string, 0) + resourceLabels := make([]string, 0) // TODO move outside the loop and reuse? + resourceValues := make([]string, 0) // TODO don't allocate unless needed? if p.Cfg.EnableTargetInfo { resourceLabels, resourceValues = processor_util.GetTargetInfoAttributesValues(rs.Resource.Attributes, p.Cfg.TargetInfoExcludedDimensions) @@ -201,7 +201,7 @@ func (p *Processor) aggregateMetricsForSpan(svcName string, jobName string, inst labelValues = append(labelValues, instanceID) } - spanMultiplier := processor_util.GetSpanMultiplier(p.Cfg.SpanMultiplierKey, span) + spanMultiplier := processor_util.GetSpanMultiplier(p.Cfg.SpanMultiplierKey, span, rs) registryLabelValues := p.registry.NewLabelValueCombo(labels, labelValues) @@ -219,6 +219,9 @@ func (p *Processor) aggregateMetricsForSpan(svcName string, jobName string, inst // update target_info label values if p.Cfg.EnableTargetInfo { + // TODO - The resource labels only need to be sanitized once + // TODO - attribute names are stable across applications + // so let's cache the result of previous sanitizations resourceAttributesCount := len(targetInfoLabels) for index, label := range targetInfoLabels { // sanitize label name @@ -239,6 +242,7 @@ func (p *Processor) aggregateMetricsForSpan(svcName string, jobName string, inst targetInfoRegistryLabelValues := p.registry.NewLabelValueCombo(targetInfoLabels, targetInfoLabelValues) // only register target info if at least (job or instance) AND one other attribute are present + // TODO - We can move this check to the top if resourceAttributesCount > 0 && len(targetInfoLabels) > resourceAttributesCount { p.spanMetricsTargetInfo.SetForTargetInfo(targetInfoRegistryLabelValues, 1) } diff --git a/modules/generator/processor/util/util.go b/modules/generator/processor/util/util.go index 9f39a16e83c..796872b526a 100644 --- a/modules/generator/processor/util/util.go +++ b/modules/generator/processor/util/util.go @@ -4,6 +4,7 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.25.0" v1_common "github.com/grafana/tempo/pkg/tempopb/common/v1" + v1_resource "github.com/grafana/tempo/pkg/tempopb/resource/v1" v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" tempo_util "github.com/grafana/tempo/pkg/util" ) @@ -31,19 +32,26 @@ func FindAttributeValue(key string, attributes ...[]*v1_common.KeyValue) (string return "", false } -func GetSpanMultiplier(ratioKey string, span *v1.Span) float64 { - spanMultiplier := 1.0 +func GetSpanMultiplier(ratioKey string, span *v1.Span, rs *v1_resource.Resource) float64 { if ratioKey != "" { for _, kv := range span.Attributes { if kv.Key == ratioKey { v := kv.Value.GetDoubleValue() if v > 0 { - spanMultiplier = 1.0 / v + return 1.0 / v + } + } + } + for _, kv := range rs.Attributes { + if kv.Key == ratioKey { + v := kv.Value.GetDoubleValue() + if v > 0 { + return 1.0 / v } } } } - return spanMultiplier + return 1.0 } func GetJobValue(attributes []*v1_common.KeyValue) string { @@ -61,6 +69,7 @@ func GetJobValue(attributes []*v1_common.KeyValue) string { } func GetTargetInfoAttributesValues(attributes []*v1_common.KeyValue, exclude []string) ([]string, []string) { + // TODO allocate with known length, or take new params for existing buffers keys := make([]string, 0) values := make([]string, 0) for _, attrs := range attributes { diff --git a/modules/generator/registry/config.go b/modules/generator/registry/config.go index b264b31ee89..721c7481a06 100644 --- a/modules/generator/registry/config.go +++ b/modules/generator/registry/config.go @@ -10,8 +10,8 @@ type Config struct { // Defaults to 15s. CollectionInterval time.Duration `yaml:"collection_interval"` - // StaleDuration controls how quickly series become stale and are deleted from the registry. An active - // series is deleted if it hasn't been updated more stale duration. + // StaleDuration controls how quickly series become stale and are deleted from the registry. + // An active series is deleted if it hasn't been updated for a more than the stale duration. // Defaults to 15m. StaleDuration time.Duration `yaml:"stale_duration"` diff --git a/modules/generator/registry/counter.go b/modules/generator/registry/counter.go index bea9152a41d..8f7f5af1a09 100644 --- a/modules/generator/registry/counter.go +++ b/modules/generator/registry/counter.go @@ -9,8 +9,6 @@ import ( "go.uber.org/atomic" ) -var _ metric = (*counter)(nil) - type counter struct { //nolint unused metric @@ -40,8 +38,6 @@ var ( _ metric = (*counter)(nil) ) -const insertOffsetDuration = 1 * time.Second - func (co *counterSeries) isNew() bool { return co.firstSeries.Load() } @@ -144,9 +140,6 @@ func (c *counter) collectMetrics(appender storage.Appender, timeMs int64, extern lb := labels.NewBuilder(baseLabels) for _, s := range c.series { - t := time.UnixMilli(timeMs) - - // reset labels for every series lb.Reset(baseLabels) // set series-specific labels @@ -158,16 +151,17 @@ func (c *counter) collectMetrics(appender storage.Appender, timeMs int64, extern // to first insert a 0 value to allow Prometheus to start from a non-null // value. if s.isNew() { - _, err = appender.Append(0, lb.Labels(), timeMs, 0) + // We set the timestamp of the init serie at the end of the previous minute, that way we ensure it ends in a + // different aggregation interval to avoid be downsampled. + endOfLastMinuteMs := getEndOfLastMinuteMs(timeMs) + _, err = appender.Append(0, lb.Labels(), endOfLastMinuteMs, 0) if err != nil { return } - // Increment timeMs to ensure that the next value is not at the same time. - t = t.Add(insertOffsetDuration) s.registerSeenSeries() } - _, err = appender.Append(0, lb.Labels(), t.UnixMilli(), s.value.Load()) + _, err = appender.Append(0, lb.Labels(), timeMs, s.value.Load()) if err != nil { return } diff --git a/modules/generator/registry/counter_test.go b/modules/generator/registry/counter_test.go index d4ce706e52d..a9d6b2f6bc0 100644 --- a/modules/generator/registry/counter_test.go +++ b/modules/generator/registry/counter_test.go @@ -13,7 +13,7 @@ import ( func Test_counter(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } @@ -26,12 +26,12 @@ func Test_counter(t *testing.T) { assert.Equal(t, 2, seriesAdded) collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, offsetCollectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, offsetCollectionTimeMs, 2), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 1), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 2), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 2, expectedSamples, nil) @@ -41,12 +41,12 @@ func Test_counter(t *testing.T) { assert.Equal(t, 3, seriesAdded) collectionTimeMs = time.Now().UnixMilli() - offsetCollectionTimeMs = time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs = getEndOfLastMinuteMs(collectionTimeMs) expectedSamples = []sample{ newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 4), - newSample(map[string]string{"__name__": "my_counter", "label": "value-3"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-3"}, offsetCollectionTimeMs, 3), + newSample(map[string]string{"__name__": "my_counter", "label": "value-3"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-3"}, collectionTimeMs, 3), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 3, expectedSamples, nil) @@ -54,7 +54,7 @@ func Test_counter(t *testing.T) { func TestCounterDifferentLabels(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } @@ -67,12 +67,12 @@ func TestCounterDifferentLabels(t *testing.T) { assert.Equal(t, 2, seriesAdded) collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, offsetCollectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_counter", "another_label": "another_value"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "another_label": "another_value"}, offsetCollectionTimeMs, 2), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 1), + newSample(map[string]string{"__name__": "my_counter", "another_label": "another_value"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "another_label": "another_value"}, collectionTimeMs, 2), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 2, expectedSamples, nil) } @@ -93,12 +93,12 @@ func Test_counter_cantAdd(t *testing.T) { c.Inc(newLabelValueCombo([]string{"label"}, []string{"value-2"}), 2.0) collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, offsetCollectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, offsetCollectionTimeMs, 2), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 1), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 2), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 2, expectedSamples, nil) @@ -134,12 +134,12 @@ func Test_counter_removeStaleSeries(t *testing.T) { assert.Equal(t, 0, removedSeries) collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, offsetCollectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, offsetCollectionTimeMs, 2), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 1), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2"}, collectionTimeMs, 2), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 2, expectedSamples, nil) @@ -167,12 +167,12 @@ func Test_counter_externalLabels(t *testing.T) { c.Inc(newLabelValueCombo([]string{"label"}, []string{"value-2"}), 2.0) collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1", "external_label": "external_value"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1", "external_label": "external_value"}, offsetCollectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2", "external_label": "external_value"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-2", "external_label": "external_value"}, offsetCollectionTimeMs, 2), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1", "external_label": "external_value"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1", "external_label": "external_value"}, collectionTimeMs, 1), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2", "external_label": "external_value"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-2", "external_label": "external_value"}, collectionTimeMs, 2), } collectMetricAndAssert(t, c, collectionTimeMs, map[string]string{"external_label": "external_value"}, 2, expectedSamples, nil) } @@ -253,10 +253,10 @@ func Test_counter_concurrencyCorrectness(t *testing.T) { wg.Wait() collectionTimeMs := time.Now().UnixMilli() - offsetCollectionTimeMs := time.UnixMilli(collectionTimeMs).Add(insertOffsetDuration).UnixMilli() + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, 0), - newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, offsetCollectionTimeMs, totalCount.Load()), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, endOfLastMinuteMs, 0), + newSample(map[string]string{"__name__": "my_counter", "label": "value-1"}, collectionTimeMs, totalCount.Load()), } collectMetricAndAssert(t, c, collectionTimeMs, nil, 1, expectedSamples, nil) } diff --git a/modules/generator/registry/gauge_test.go b/modules/generator/registry/gauge_test.go index 7f506f46f33..f5124f91564 100644 --- a/modules/generator/registry/gauge_test.go +++ b/modules/generator/registry/gauge_test.go @@ -12,7 +12,7 @@ import ( func Test_gaugeInc(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } @@ -47,7 +47,7 @@ func Test_gaugeInc(t *testing.T) { func TestGaugeDifferentLabels(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } @@ -69,7 +69,7 @@ func TestGaugeDifferentLabels(t *testing.T) { func Test_gaugeSet(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } diff --git a/modules/generator/registry/histogram.go b/modules/generator/registry/histogram.go index ed1e64c4f14..0b51ecaa429 100644 --- a/modules/generator/registry/histogram.go +++ b/modules/generator/registry/histogram.go @@ -17,12 +17,13 @@ import ( var _ metric = (*histogram)(nil) type histogram struct { - metricName string - nameCount string - nameSum string - nameBucket string - buckets []float64 - bucketLabels []string + metricName string + nameCount string + nameSum string + nameBucket string + buckets []float64 + bucketLabels []string + externalLabels map[string]string seriesMtx sync.Mutex series map[uint64]*histogramSeries @@ -34,10 +35,12 @@ type histogram struct { } type histogramSeries struct { - // labelValueCombo should not be modified after creation - labels LabelPair - count *atomic.Float64 - sum *atomic.Float64 + countLabels labels.Labels + sumLabels labels.Labels + bucketLabels []labels.Labels + + count *atomic.Float64 + sum *atomic.Float64 // buckets includes the +Inf bucket buckets []*atomic.Float64 // exemplar is stored as a single traceID @@ -64,7 +67,7 @@ var ( _ metric = (*histogram)(nil) ) -func newHistogram(name string, buckets []float64, onAddSeries func(uint32) bool, onRemoveSeries func(count uint32), traceIDLabelName string) *histogram { +func newHistogram(name string, buckets []float64, onAddSeries func(uint32) bool, onRemoveSeries func(count uint32), traceIDLabelName string, externalLabels map[string]string) *histogram { if onAddSeries == nil { onAddSeries = func(uint32) bool { return true @@ -97,6 +100,7 @@ func newHistogram(name string, buckets []float64, onAddSeries func(uint32) bool, onAddSerie: onAddSeries, onRemoveSerie: onRemoveSeries, traceIDLabelName: traceIDLabelName, + externalLabels: externalLabels, } } @@ -121,13 +125,13 @@ func (h *histogram) ObserveWithExemplar(labelValueCombo *LabelValueCombo, value func (h *histogram) newSeries(labelValueCombo *LabelValueCombo, value float64, traceID string, multiplier float64) *histogramSeries { newSeries := &histogramSeries{ - labels: labelValueCombo.getLabelPair(), - count: atomic.NewFloat64(0), - sum: atomic.NewFloat64(0), - buckets: nil, - exemplars: nil, - lastUpdated: atomic.NewInt64(0), - firstSeries: atomic.NewBool(true), + count: atomic.NewFloat64(0), + sum: atomic.NewFloat64(0), + buckets: make([]*atomic.Float64, 0, len(h.buckets)), + exemplars: make([]*atomic.String, 0, len(h.buckets)), + exemplarValues: make([]*atomic.Float64, 0, len(h.buckets)), + lastUpdated: atomic.NewInt64(0), + firstSeries: atomic.NewBool(true), } for i := 0; i < len(h.buckets); i++ { newSeries.buckets = append(newSeries.buckets, atomic.NewFloat64(0)) @@ -135,6 +139,33 @@ func (h *histogram) newSeries(labelValueCombo *LabelValueCombo, value float64, t newSeries.exemplarValues = append(newSeries.exemplarValues, atomic.NewFloat64(0)) } + // Precompute all labels for all sub-metrics upfront + + // Create and populate label builder + lbls := labelValueCombo.getLabelPair() + lb := labels.NewBuilder(make(labels.Labels, 1+len(lbls.names))) + for i, name := range lbls.names { + lb.Set(name, lbls.values[i]) + } + for name, value := range h.externalLabels { + lb.Set(name, value) + } + + // _count + lb.Set(labels.MetricName, h.nameCount) + newSeries.countLabels = lb.Labels() + + // _sum + lb.Set(labels.MetricName, h.nameSum) + newSeries.sumLabels = lb.Labels() + + // _bucket + lb.Set(labels.MetricName, h.nameBucket) + for _, b := range h.bucketLabels { + lb.Set(labels.BucketLabel, b) + newSeries.bucketLabels = append(newSeries.bucketLabels, lb.Labels()) + } + h.updateSeries(newSeries, value, traceID, multiplier) return newSeries @@ -161,37 +192,20 @@ func (h *histogram) name() string { return h.metricName } -func (h *histogram) collectMetrics(appender storage.Appender, timeMs int64, externalLabels map[string]string) (activeSeries int, err error) { +func (h *histogram) collectMetrics(appender storage.Appender, timeMs int64, _ map[string]string) (activeSeries int, err error) { h.seriesMtx.Lock() defer h.seriesMtx.Unlock() - t := timeMs - activeSeries = len(h.series) * int(h.activeSeriesPerHistogramSerie()) - labelsCount := 0 - if activeSeries > 0 && h.series[0] != nil { - labelsCount = len(h.series[0].labels.names) - } - lbls := make(labels.Labels, 1+len(externalLabels)+labelsCount) - lb := labels.NewBuilder(lbls) - - // set external labels - for name, value := range externalLabels { - lb.Set(name, value) - } - for _, s := range h.series { - // set series-specific labels - for i, name := range s.labels.names { - lb.Set(name, s.labels.values[i]) - } - // If we are about to call Append for the first time on a series, // we need to first insert a 0 value to allow Prometheus to start from a non-null value. if s.isNew() { - lb.Set(labels.MetricName, h.nameCount) - _, err = appender.Append(0, lb.Labels(), t-1, 0) // t-1 to ensure that the next value is not at the same time + // We set the timestamp of the init serie at the end of the previous minute, that way we ensure it ends in a + // different aggregation interval to avoid be downsampled. + endOfLastMinuteMs := getEndOfLastMinuteMs(timeMs) + _, err = appender.Append(0, s.countLabels, endOfLastMinuteMs, 0) if err != nil { return } @@ -199,38 +213,33 @@ func (h *histogram) collectMetrics(appender storage.Appender, timeMs int64, exte } // sum - lb.Set(labels.MetricName, h.nameSum) - _, err = appender.Append(0, lb.Labels(), t, s.sum.Load()) + _, err = appender.Append(0, s.sumLabels, timeMs, s.sum.Load()) if err != nil { return } // count - lb.Set(labels.MetricName, h.nameCount) - _, err = appender.Append(0, lb.Labels(), t, s.count.Load()) + _, err = appender.Append(0, s.countLabels, timeMs, s.count.Load()) if err != nil { return } // bucket - lb.Set(labels.MetricName, h.nameBucket) - - for i, bucketLabel := range h.bucketLabels { - lb.Set(labels.BucketLabel, bucketLabel) - ref, err := appender.Append(0, lb.Labels(), t, s.buckets[i].Load()) + for i := range h.bucketLabels { + ref, err := appender.Append(0, s.bucketLabels[i], timeMs, s.buckets[i].Load()) if err != nil { return activeSeries, err } ex := s.exemplars[i].Load() if ex != "" { - _, err = appender.AppendExemplar(ref, lb.Labels(), exemplar.Exemplar{ + _, err = appender.AppendExemplar(ref, s.bucketLabels[i], exemplar.Exemplar{ Labels: []labels.Label{{ Name: h.traceIDLabelName, Value: ex, }}, Value: s.exemplarValues[i].Load(), - Ts: t, + Ts: timeMs, }) if err != nil { return activeSeries, err @@ -239,8 +248,6 @@ func (h *histogram) collectMetrics(appender storage.Appender, timeMs int64, exte // clear the exemplar so we don't emit it again s.exemplars[i].Store("") } - - lb.Del(labels.BucketLabel) } return diff --git a/modules/generator/registry/histogram_test.go b/modules/generator/registry/histogram_test.go index 731ee539234..8cf214af5d0 100644 --- a/modules/generator/registry/histogram_test.go +++ b/modules/generator/registry/histogram_test.go @@ -14,12 +14,12 @@ import ( func Test_histogram(t *testing.T) { var seriesAdded int - onAdd := func(count uint32) bool { + onAdd := func(_ uint32) bool { seriesAdded++ return true } - h := newHistogram("my_histogram", []float64{1.0, 2.0}, onAdd, nil, "trace_id") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, onAdd, nil, "trace_id", nil) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-1"}), 1.0, "trace-1", 1.0) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-2"}), 1.5, "trace-2", 1.0) @@ -27,15 +27,16 @@ func Test_histogram(t *testing.T) { assert.Equal(t, 2, seriesAdded) collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) + expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // Zero entry for value-1 series + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // Zero entry for value-1 series newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), // Zero entry for value-2 series + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), // Zero entry for value-2 series newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -62,7 +63,7 @@ func Test_histogram(t *testing.T) { assert.Equal(t, 3, seriesAdded) collectionTimeMs = time.Now().UnixMilli() - collectionTimeWithOffsetMs = collectionTimeMs - 1 + endOfLastMinuteMs = getEndOfLastMinuteMs(collectionTimeMs) expectedSamples = []sample{ newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), @@ -74,7 +75,7 @@ func Test_histogram(t *testing.T) { newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "+Inf"}, collectionTimeMs, 2), - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-3"}, collectionTimeWithOffsetMs, 0), // Zero entry for value-3 series + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-3"}, endOfLastMinuteMs, 0), // Zero entry for value-3 series newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-3"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-3"}, collectionTimeMs, 3), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-3", "le": "1"}, collectionTimeMs, 0), @@ -146,7 +147,7 @@ func Test_histogram_cantAdd(t *testing.T) { return canAdd } - h := newHistogram("my_histogram", []float64{1.0, 2.0}, onAdd, nil, "") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, onAdd, nil, "", nil) // allow adding new series canAdd = true @@ -155,15 +156,15 @@ func Test_histogram_cantAdd(t *testing.T) { h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-2"}), 1.5, "", 1.0) collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -201,7 +202,7 @@ func Test_histogram_removeStaleSeries(t *testing.T) { removedSeries++ } - h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, onRemove, "") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, onRemove, "", nil) timeMs := time.Now().UnixMilli() h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-1"}), 1.0, "", 1.0) @@ -212,15 +213,15 @@ func Test_histogram_removeStaleSeries(t *testing.T) { assert.Equal(t, 0, removedSeries) collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -251,32 +252,34 @@ func Test_histogram_removeStaleSeries(t *testing.T) { } func Test_histogram_externalLabels(t *testing.T) { - h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "") + extLabels := map[string]string{"external_label": "external_value"} + + h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "", extLabels) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-1"}), 1.0, "", 1.0) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-2"}), 1.5, "", 1.0) collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1", "external_label": "external_value"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1", "external_label": "external_value"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "2", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "+Inf", "external_label": "external_value"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2", "external_label": "external_value"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2", "external_label": "external_value"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-2", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-2", "external_label": "external_value"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "1", "external_label": "external_value"}, collectionTimeMs, 0), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "2", "external_label": "external_value"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-2", "le": "+Inf", "external_label": "external_value"}, collectionTimeMs, 1), } - collectMetricAndAssert(t, h, collectionTimeMs, map[string]string{"external_label": "external_value"}, 10, expectedSamples, nil) + collectMetricAndAssert(t, h, collectionTimeMs, extLabels, 10, expectedSamples, nil) } func Test_histogram_concurrencyDataRace(t *testing.T) { - h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "", nil) end := make(chan struct{}) @@ -322,7 +325,7 @@ func Test_histogram_concurrencyDataRace(t *testing.T) { } func Test_histogram_concurrencyCorrectness(t *testing.T) { - h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "", nil) var wg sync.WaitGroup end := make(chan struct{}) @@ -351,9 +354,9 @@ func Test_histogram_concurrencyCorrectness(t *testing.T) { wg.Wait() collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, float64(totalCount.Load())), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 2*float64(totalCount.Load())), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 0), @@ -364,14 +367,14 @@ func Test_histogram_concurrencyCorrectness(t *testing.T) { } func Test_histogram_span_multiplier(t *testing.T) { - h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "") + h := newHistogram("my_histogram", []float64{1.0, 2.0}, nil, nil, "", nil) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-1"}), 1.0, "", 1.5) h.ObserveWithExemplar(newLabelValueCombo([]string{"label"}, []string{"value-1"}), 2.0, "", 5) collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) expectedSamples := []sample{ - newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), + newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), newSample(map[string]string{"__name__": "my_histogram_count", "label": "value-1"}, collectionTimeMs, 6.5), newSample(map[string]string{"__name__": "my_histogram_sum", "label": "value-1"}, collectionTimeMs, 11.5), newSample(map[string]string{"__name__": "my_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1.5), diff --git a/modules/generator/registry/native_histogram.go b/modules/generator/registry/native_histogram.go index b19d6e56632..95596f6f315 100644 --- a/modules/generator/registry/native_histogram.go +++ b/modules/generator/registry/native_histogram.go @@ -285,7 +285,8 @@ func (h *nativeHistogram) nativeHistograms(appender storage.Appender, lb *labels func (h *nativeHistogram) classicHistograms(appender storage.Appender, lb *labels.Builder, timeMs int64, s *nativeHistogramSeries) (activeSeries int, err error) { if s.isNew() { lb.Set(labels.MetricName, h.metricName+"_count") - _, err = appender.Append(0, lb.Labels(), timeMs-1, 0) + endOfLastMinuteMs := getEndOfLastMinuteMs(timeMs) + _, err = appender.Append(0, lb.Labels(), endOfLastMinuteMs, 0) if err != nil { return activeSeries, err } diff --git a/modules/generator/registry/native_histogram_test.go b/modules/generator/registry/native_histogram_test.go index a740a174753..91ad19e34e2 100644 --- a/modules/generator/registry/native_histogram_test.go +++ b/modules/generator/registry/native_histogram_test.go @@ -48,7 +48,7 @@ func Test_Histograms(t *testing.T) { } collectionTimeMs := time.Now().UnixMilli() - collectionTimeWithOffsetMs := collectionTimeMs - 1 + endOfLastMinuteMs := getEndOfLastMinuteMs(collectionTimeMs) cases := []struct { name string @@ -71,7 +71,7 @@ func Test_Histograms(t *testing.T) { }, }, expectedSamples: []sample{ - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), @@ -108,13 +108,13 @@ func Test_Histograms(t *testing.T) { }, }, expectedSamples: []sample{ - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -156,13 +156,13 @@ func Test_Histograms(t *testing.T) { }, }, expectedSamples: []sample{ - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -208,7 +208,7 @@ func Test_Histograms(t *testing.T) { newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "+Inf"}, collectionTimeMs, 2), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-3"}, collectionTimeMs, 3), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-3", "le": "1"}, collectionTimeMs, 0), @@ -250,13 +250,13 @@ func Test_Histograms(t *testing.T) { }, }, expectedSamples: []sample{ - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeMs, 20), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-1"}, collectionTimeMs, 20*1.5), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 0), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 20), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 20), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeMs, 13), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-2"}, collectionTimeMs, 13*3), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -299,13 +299,13 @@ func Test_Histograms(t *testing.T) { }, }, expectedSamples: []sample{ - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "1"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-1", "le": "+Inf"}, collectionTimeMs, 1), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-2"}, collectionTimeMs, 1.5), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), @@ -351,7 +351,7 @@ func Test_Histograms(t *testing.T) { newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "1"}, collectionTimeMs, 0), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "2"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-2", "le": "+Inf"}, collectionTimeMs, 2), - newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, collectionTimeWithOffsetMs, 0), // zero count at the beginning + newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, endOfLastMinuteMs, 0), // zero count at the beginning newSample(map[string]string{"__name__": "test_histogram_count", "label": "value-3"}, collectionTimeMs, 1), newSample(map[string]string{"__name__": "test_histogram_sum", "label": "value-3"}, collectionTimeMs, 3), newSample(map[string]string{"__name__": "test_histogram_bucket", "label": "value-3", "le": "1"}, collectionTimeMs, 0), @@ -454,7 +454,7 @@ func Test_Histograms(t *testing.T) { t.Run(tc.name, func(t *testing.T) { t.Run("classic", func(t *testing.T) { onAdd := func(uint32) bool { return true } - h := newHistogram("test_histogram", tc.buckets, onAdd, nil, "trace_id") + h := newHistogram("test_histogram", tc.buckets, onAdd, nil, "trace_id", nil) testHistogram(t, h, tc.collections) }) t.Run("native", func(t *testing.T) { diff --git a/modules/generator/registry/registry.go b/modules/generator/registry/registry.go index 92a9cc2f1fa..5b96f586b25 100644 --- a/modules/generator/registry/registry.go +++ b/modules/generator/registry/registry.go @@ -87,6 +87,8 @@ type metric interface { removeStaleSeries(staleTimeMs int64) } +const highestAggregationInterval = 1 * time.Minute + var _ Registry = (*ManagedRegistry)(nil) // New creates a ManagedRegistry. This Registry will scrape itself, write samples into an appender @@ -128,7 +130,7 @@ func New(cfg *Config, overrides Overrides, tenant string, appendable storage.App metricFailedCollections: metricFailedCollections.WithLabelValues(tenant), } - go job(instanceCtx, r.collectMetrics, r.collectionInterval) + go job(instanceCtx, r.CollectMetrics, r.collectionInterval) go job(instanceCtx, r.removeStaleSeries, constantInterval(5*time.Minute)) return r @@ -156,7 +158,7 @@ func (r *ManagedRegistry) NewHistogram(name string, buckets []float64, histogram if hasNativeHistograms(histogramOverride) { h = newNativeHistogram(name, buckets, r.onAddMetricSeries, r.onRemoveMetricSeries, traceIDLabelName, histogramOverride) } else { - h = newHistogram(name, buckets, r.onAddMetricSeries, r.onRemoveMetricSeries, traceIDLabelName) + h = newHistogram(name, buckets, r.onAddMetricSeries, r.onRemoveMetricSeries, traceIDLabelName, r.externalLabels) } r.registerMetric(h) @@ -201,7 +203,7 @@ func (r *ManagedRegistry) onRemoveMetricSeries(count uint32) { r.metricActiveSeries.Sub(float64(count)) } -func (r *ManagedRegistry) collectMetrics(ctx context.Context) { +func (r *ManagedRegistry) CollectMetrics(ctx context.Context) { if r.overrides.MetricsGeneratorDisableCollection(r.tenant) { return } @@ -286,3 +288,7 @@ func hasNativeHistograms(s HistogramMode) bool { func hasClassicHistograms(s HistogramMode) bool { return s == HistogramModeClassic || s == HistogramModeBoth } + +func getEndOfLastMinuteMs(timeMs int64) int64 { + return time.UnixMilli(timeMs).Truncate(highestAggregationInterval).Add(-1 * time.Second).UnixMilli() +} diff --git a/modules/generator/registry/registry_test.go b/modules/generator/registry/registry_test.go index f97f9ff3aa1..984323847a7 100644 --- a/modules/generator/registry/registry_test.go +++ b/modules/generator/registry/registry_test.go @@ -46,7 +46,7 @@ func TestManagedRegistry_concurrency(*testing.T) { }) go accessor(func() { - registry.collectMetrics(context.Background()) + registry.CollectMetrics(context.Background()) }) go accessor(func() { @@ -214,7 +214,7 @@ func TestManagedRegistry_disableCollection(t *testing.T) { // active series are still tracked assert.Equal(t, uint32(1), registry.activeSeries.Load()) // but no samples are collected and sent out - registry.collectMetrics(context.Background()) + registry.CollectMetrics(context.Background()) assert.Empty(t, appender.samples) assert.Empty(t, appender.exemplars) } @@ -296,7 +296,7 @@ func TestHistogramOverridesConfig(t *testing.T) { func collectRegistryMetricsAndAssert(t *testing.T, r *ManagedRegistry, appender *capturingAppender, expectedSamples []sample) { collectionTimeMs := time.Now().UnixMilli() - r.collectMetrics(context.Background()) + r.CollectMetrics(context.Background()) // Ignore the collection time on expected samples, since we won't know when the collection will actually take place. for i := range expectedSamples { diff --git a/modules/ingester/instance_search.go b/modules/ingester/instance_search.go index 107d9cabaa9..98349bb0356 100644 --- a/modules/ingester/instance_search.go +++ b/modules/ingester/instance_search.go @@ -198,6 +198,7 @@ func (i *instance) SearchTags(ctx context.Context, scope string) (*tempopb.Searc return &tempopb.SearchTagsResponse{ TagNames: distinctValues.Strings(), + Metrics: v2Response.Metrics, // send metrics with response }, nil } @@ -221,6 +222,7 @@ func (i *instance) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequ Tags: search.GetVirtualIntrinsicValues(), }, }, + Metrics: &tempopb.MetadataMetrics{InspectedBytes: 0}, // no bytes read for intrinsics }, nil } @@ -232,6 +234,7 @@ func (i *instance) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequ limit := i.limiter.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewScopedDistinctString(limit) + mc := collector.NewMetricsCollector() engine := traceql.NewEngine() query := traceql.ExtractMatchers(req.Query) @@ -251,7 +254,7 @@ func (i *instance) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequ if traceql.IsEmptyQuery(query) { err = s.SearchTags(ctx, attributeScope, func(t string, scope traceql.AttributeScope) { distinctValues.Collect(scope.String(), t) - }, common.DefaultSearchOptions()) + }, mc.Add, common.DefaultSearchOptions()) if err != nil && !errors.Is(err, common.ErrUnsupported) { return fmt.Errorf("unexpected error searching tags: %w", err) } @@ -261,12 +264,11 @@ func (i *instance) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequ // otherwise use the filtered search fetcher := traceql.NewTagNamesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback) error { - return s.FetchTagNames(ctx, req, cb, common.DefaultSearchOptions()) + return s.FetchTagNames(ctx, req, cb, mc.Add, common.DefaultSearchOptions()) }) return engine.ExecuteTagNames(ctx, attributeScope, query, func(tag string, scope traceql.AttributeScope) bool { - distinctValues.Collect(scope.String(), tag) - return distinctValues.Exceeded() + return distinctValues.Collect(scope.String(), tag) }, fetcher) } @@ -300,6 +302,9 @@ func (i *instance) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequ collected := distinctValues.Strings() resp := &tempopb.SearchTagsV2Response{ Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(collected)+1), // +1 for intrinsic below + Metrics: &tempopb.MetadataMetrics{ + InspectedBytes: mc.TotalValue(), // capture metrics + }, } for scope, vals := range collected { resp.Scopes = append(resp.Scopes, &tempopb.SearchTagsV2Scope{ @@ -327,6 +332,7 @@ func (i *instance) SearchTagValues(ctx context.Context, tagName string) (*tempop limit := i.limiter.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewDistinctString(limit) + mc := collector.NewMetricsCollector() var inspectedBlocks, maxBlocks int if limit := i.limiter.limits.MaxBlocksPerTagValuesQuery(userID); limit > 0 { @@ -346,7 +352,7 @@ func (i *instance) SearchTagValues(ctx context.Context, tagName string) (*tempop } inspectedBlocks++ - err = s.SearchTagValues(ctx, tagName, dv.Collect, common.DefaultSearchOptions()) + err = s.SearchTagValues(ctx, tagName, dv.Collect, mc.Add, common.DefaultSearchOptions()) if err != nil && !errors.Is(err, common.ErrUnsupported) { return fmt.Errorf("unexpected error searching tag values (%s): %w", tagName, err) } @@ -376,11 +382,12 @@ func (i *instance) SearchTagValues(ctx context.Context, tagName string) (*tempop } if distinctValues.Exceeded() { - level.Warn(log.Logger).Log("msg", "size of tag values in instance exceeded limit, reduce cardinality or size of tags", "tag", tagName, "userID", userID, "limit", limit, "total", distinctValues.TotalDataSize()) + level.Warn(log.Logger).Log("msg", "size of tag values in instance exceeded limit, reduce cardinality or size of tags", "tag", tagName, "userID", userID, "limit", limit, "size", distinctValues.Size()) } return &tempopb.SearchTagValuesResponse{ TagValues: distinctValues.Strings(), + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, }, nil } @@ -395,6 +402,7 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag limit := i.limiter.limits.MaxBytesPerTagValuesQuery(userID) valueCollector := collector.NewDistinctValue[tempopb.TagValue](limit, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) + mc := collector.NewMetricsCollector() // to collect bytesRead metric engine := traceql.NewEngine() @@ -428,12 +436,12 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag // helper functions as closures, to access local variables performSearch := func(ctx context.Context, s common.Searcher, collector *collector.DistinctValue[tempopb.TagValue]) error { if traceql.IsEmptyQuery(query) { - return s.SearchTagValuesV2(ctx, tag, traceql.MakeCollectTagValueFunc(collector.Collect), common.DefaultSearchOptions()) + return s.SearchTagValuesV2(ctx, tag, traceql.MakeCollectTagValueFunc(collector.Collect), mc.Add, common.DefaultSearchOptions()) } // Otherwise, use the filtered search fetcher := traceql.NewTagValuesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback) error { - return s.FetchTagValues(ctx, req, cb, common.DefaultSearchOptions()) + return s.FetchTagValues(ctx, req, cb, mc.Add, common.DefaultSearchOptions()) }) return engine.ExecuteTagValues(ctx, tag, query, traceql.MakeCollectTagValueFunc(collector.Collect), fetcher) @@ -477,7 +485,7 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag _ = level.Warn(log.Logger).Log("msg", "GetDiskCache failed", "err", err) } - // we got data...unmarshall, and add values to central collector + // we got data...unmarshall, and add values to central collector and add bytesRead if len(cacheData) > 0 && err == nil { resp := &tempopb.SearchTagValuesV2Response{} err = proto.Unmarshal(cacheData, resp) @@ -485,6 +493,14 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag return err } span.SetAttributes(attribute.Bool("cached", true)) + // Instead of the reporting the InspectedBytes of the cached response. + // we report the size of cacheData as the Inspected bytes in case we hit disk cache. + // we do this because, because it's incorrect and misleading to report the metrics of cachedResponse + // we report the size of the cacheData as the amount of data was read to search this block. + // this can skew our metrics because this will be lower than the data read to search the block. + // we can remove this if this becomes an issue but leave it in for now to more accurate. + mc.Add(uint64(len(cacheData))) + for _, v := range resp.TagValues { if valueCollector.Collect(*v) { break // we have reached the limit, so stop @@ -493,26 +509,26 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag return nil } + // cache miss, search the block. We will cache the results if we find any. span.SetAttributes(attribute.Bool("cached", false)) - // results not in cache, so search the block - // using a local collector to collect values from the block and set cache + // using local collector to collect values from the block and cache them. localCol := collector.NewDistinctValue[tempopb.TagValue](limit, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) localErr := performSearch(ctx, b, localCol) if localErr != nil { return localErr } - // marshal the local collector and set the cache + // marshal the values local collector and set the cache values := localCol.Values() - valuesProto, err := valuesToTagValuesV2RespProto(values) - if err == nil && len(valuesProto) > 0 { - err2 := b.SetDiskCache(ctx, cacheKey, valuesProto) + v2RespProto, err := valuesToTagValuesV2RespProto(values) + if err == nil && len(v2RespProto) > 0 { + err2 := b.SetDiskCache(ctx, cacheKey, v2RespProto) if err2 != nil { _ = level.Warn(log.Logger).Log("msg", "SetDiskCache failed", "err", err2) } } - // add values to the central collector + // now add values to the central collector to make sure they are included in the response. for _, v := range values { if valueCollector.Collect(v) { break // we have reached the limit, so stop @@ -576,7 +592,9 @@ func (i *instance) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTag _ = level.Warn(log.Logger).Log("msg", "size of tag values exceeded limit, reduce cardinality or size of tags", "tag", req.TagName, "userID", userID, "limit", limit, "size", valueCollector.Size()) } - resp := &tempopb.SearchTagValuesV2Response{} + resp := &tempopb.SearchTagValuesV2Response{ + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, // include metrics in response + } for _, v := range valueCollector.Values() { v2 := v @@ -621,6 +639,7 @@ func searchTagValuesV2CacheKey(req *tempopb.SearchTagValuesRequest, limit int, p // valuesToTagValuesV2RespProto converts TagValues to a protobuf marshalled bytes // this is slightly modified version of valuesToV2Response from querier.go func valuesToTagValuesV2RespProto(tagValues []tempopb.TagValue) ([]byte, error) { + // NOTE: we only cache TagValues and don't Marshal Metrics resp := &tempopb.SearchTagValuesV2Response{} resp.TagValues = make([]*tempopb.TagValue, 0, len(tagValues)) diff --git a/modules/ingester/instance_search_test.go b/modules/ingester/instance_search_test.go index 291477428c7..cc91b3b4b27 100644 --- a/modules/ingester/instance_search_test.go +++ b/modules/ingester/instance_search_test.go @@ -275,37 +275,31 @@ func TestInstanceSearchTags(t *testing.T) { // nolint:revive,unparam func testSearchTagsAndValues(t *testing.T, ctx context.Context, i *instance, tagName string, expectedTagValues []string) { - sr, err := i.SearchTags(ctx, "") - require.NoError(t, err) - assert.Contains(t, sr.TagNames, tagName) - - sr, err = i.SearchTags(ctx, "span") - require.NoError(t, err) - assert.Contains(t, sr.TagNames, tagName) - - sr, err = i.SearchTags(ctx, "resource") - require.NoError(t, err) - assert.NotContains( - t, - sr.TagNames, - tagName, - ) // tags are added to h the spans and not resources so they should not be returned - - // added the same span tag to both event and link - sr, err = i.SearchTags(ctx, "event") - require.NoError(t, err) - assert.Contains(t, sr.TagNames, tagName) + checkSearchTags := func(scope string, contains bool) { + sr, err := i.SearchTags(ctx, scope) + require.NoError(t, err) + require.Greater(t, sr.Metrics.InspectedBytes, uint64(100)) // at least 100 bytes are inspected + if contains { + require.Contains(t, sr.TagNames, tagName) + } else { + require.NotContains(t, sr.TagNames, tagName) + } + } - sr, err = i.SearchTags(ctx, "link") - require.NoError(t, err) - assert.Contains(t, sr.TagNames, tagName) + checkSearchTags("", true) + checkSearchTags("span", true) + // tags are added to the spans and not resources so they should not be present on resource + checkSearchTags("resource", false) + checkSearchTags("event", true) + checkSearchTags("link", true) srv, err := i.SearchTagValues(ctx, tagName) require.NoError(t, err) + require.Greater(t, srv.Metrics.InspectedBytes, uint64(100)) // we scanned at-least 100 bytes sort.Strings(expectedTagValues) sort.Strings(srv.TagValues) - assert.Equal(t, expectedTagValues, srv.TagValues) + require.Equal(t, expectedTagValues, srv.TagValues) } func TestInstanceSearchTagAndValuesV2(t *testing.T) { @@ -372,74 +366,32 @@ func testSearchTagsAndValuesV2( ) { tagsResp, err := i.SearchTags(ctx, "none") require.NoError(t, err) + require.Greater(t, tagsResp.Metrics.InspectedBytes, uint64(100)) - tagValuesResp, err := i.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ - TagName: fmt.Sprintf("span.%s", tagName), - Query: query, - }) - require.NoError(t, err) - - tagValues := make([]string, 0, len(tagValuesResp.TagValues)) - for _, v := range tagValuesResp.TagValues { - tagValues = append(tagValues, v.Value) - } - - sort.Strings(tagValues) - sort.Strings(expectedTagValues) - assert.Contains(t, tagsResp.TagNames, tagName) - assert.Equal(t, expectedTagValues, tagValues) - - // Test with event and link - - tagValuesResp, err = i.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ - TagName: fmt.Sprintf("event.%s", tagName), - Query: query, - }) - require.NoError(t, err) - - tagValues = make([]string, 0, len(tagValuesResp.TagValues)) - for _, v := range tagValuesResp.TagValues { - tagValues = append(tagValues, v.Value) - } - - sort.Strings(tagValues) - sort.Strings(expectedEventTagValues) - assert.Contains(t, tagsResp.TagNames, tagName) - assert.Equal(t, expectedEventTagValues, tagValues) - - tagValuesResp, err = i.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ - TagName: fmt.Sprintf("link.%s", tagName), - Query: query, - }) - require.NoError(t, err) - - tagValues = make([]string, 0, len(tagValuesResp.TagValues)) - for _, v := range tagValuesResp.TagValues { - tagValues = append(tagValues, v.Value) - } - - sort.Strings(tagValues) - sort.Strings(expectedLinkTagValues) - assert.Contains(t, tagsResp.TagNames, tagName) - assert.Equal(t, expectedLinkTagValues, tagValues) - - // instrumentation scope attr + checkTagValues := func(scope string, expectedValues []string) { + tagValuesResp, err := i.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ + TagName: fmt.Sprintf("%s.%s", scope, tagName), + Query: query, + }) + require.NoError(t, err) + // we scanned at-least 100 bytes + require.Greater(t, tagValuesResp.Metrics.InspectedBytes, uint64(100)) - tagValuesResp, err = i.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ - TagName: fmt.Sprintf("instrumentation.%s", tagName), - Query: query, - }) - require.NoError(t, err) + tagValues := make([]string, 0, len(tagValuesResp.TagValues)) + for _, v := range tagValuesResp.TagValues { + tagValues = append(tagValues, v.Value) + } - tagValues = make([]string, 0, len(tagValuesResp.TagValues)) - for _, v := range tagValuesResp.TagValues { - tagValues = append(tagValues, v.Value) + sort.Strings(tagValues) + sort.Strings(expectedValues) + require.Contains(t, tagsResp.TagNames, tagName) + require.Equal(t, expectedValues, tagValues) } - sort.Strings(tagValues) - sort.Strings(expectedTagValues) - assert.Contains(t, tagsResp.TagNames, tagName) - assert.Equal(t, expectedTagValues, tagValues) + checkTagValues("span", expectedTagValues) + checkTagValues("event", expectedEventTagValues) + checkTagValues("link", expectedLinkTagValues) + checkTagValues("instrumentation", expectedTagValues) } func cacheKeysForTestSearchTagValuesV2(tagKey, query string, limit int) []string { diff --git a/modules/ingester/local_block.go b/modules/ingester/local_block.go index 05bd3ffe6cf..666dba44261 100644 --- a/modules/ingester/local_block.go +++ b/modules/ingester/local_block.go @@ -66,10 +66,22 @@ func (c *LocalBlock) Search(ctx context.Context, req *tempopb.SearchRequest, opt return c.BackendBlock.Search(ctx, req, opts) } -func (c *LocalBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, opts common.SearchOptions) error { +func (c *LocalBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { + ctx, span := tracer.Start(ctx, "LocalBlock.SearchTags") + defer span.End() + return c.BackendBlock.SearchTags(ctx, scope, cb, mcb, opts) +} + +func (c *LocalBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { + ctx, span := tracer.Start(ctx, "LocalBlock.SearchTagValues") + defer span.End() + return c.BackendBlock.SearchTagValues(ctx, tag, cb, mcb, opts) +} + +func (c *LocalBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "LocalBlock.SearchTagValuesV2") defer span.End() - return c.BackendBlock.SearchTagValuesV2(ctx, tag, cb, opts) + return c.BackendBlock.SearchTagValuesV2(ctx, tag, cb, mcb, opts) } func (c *LocalBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, opts common.SearchOptions) (traceql.FetchSpansResponse, error) { @@ -78,16 +90,16 @@ func (c *LocalBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, o return c.BackendBlock.Fetch(ctx, req, opts) } -func (c *LocalBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (c *LocalBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "LocalBlock.FetchTagValues") defer span.End() - return c.BackendBlock.FetchTagValues(ctx, req, cb, opts) + return c.BackendBlock.FetchTagValues(ctx, req, cb, mcb, opts) } -func (c *LocalBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (c *LocalBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "LocalBlock.FetchTagNames") defer span.End() - return c.BackendBlock.FetchTagNames(ctx, req, cb, opts) + return c.BackendBlock.FetchTagNames(ctx, req, cb, mcb, opts) } // FlushedTime returns the time the block was flushed. Will return 0 diff --git a/modules/overrides/config.go b/modules/overrides/config.go index 88c9cbe615a..860449fecdf 100644 --- a/modules/overrides/config.go +++ b/modules/overrides/config.go @@ -180,6 +180,11 @@ type StorageOverrides struct { DedicatedColumns backend.DedicatedColumns `yaml:"parquet_dedicated_columns" json:"parquet_dedicated_columns"` } +type CostAttributionOverrides struct { + MaxCardinality uint64 `yaml:"max_cardinality,omitempty" json:"max_cardinality,omitempty"` + Dimensions map[string]string `yaml:"dimensions,omitempty" json:"dimensions,omitempty"` +} + type Overrides struct { // Ingestion enforced overrides. Ingestion IngestionOverrides `yaml:"ingestion,omitempty" json:"ingestion,omitempty"` @@ -194,7 +199,8 @@ type Overrides struct { // Global enforced overrides. Global GlobalOverrides `yaml:"global,omitempty" json:"global,omitempty"` // Storage enforced overrides. - Storage StorageOverrides `yaml:"storage,omitempty" json:"storage,omitempty"` + Storage StorageOverrides `yaml:"storage,omitempty" json:"storage,omitempty"` + CostAttribution CostAttributionOverrides `yaml:"cost_attribution,omitempty" json:"cost_attribution,omitempty"` } type Config struct { diff --git a/modules/overrides/config_legacy.go b/modules/overrides/config_legacy.go index 65d0190cdef..526011f486d 100644 --- a/modules/overrides/config_legacy.go +++ b/modules/overrides/config_legacy.go @@ -134,6 +134,8 @@ type LegacyOverrides struct { // is not used when doing a trace by id lookup. MaxBytesPerTrace int `yaml:"max_bytes_per_trace" json:"max_bytes_per_trace"` + CostAttribution CostAttributionOverrides `yaml:"cost_attribution,omitempty" json:"cost_attribution,omitempty"` + // tempodb limits DedicatedColumns backend.DedicatedColumns `yaml:"parquet_dedicated_columns" json:"parquet_dedicated_columns"` } @@ -209,6 +211,9 @@ func (l *LegacyOverrides) toNewLimits() Overrides { Storage: StorageOverrides{ DedicatedColumns: l.DedicatedColumns, }, + CostAttribution: CostAttributionOverrides{ + Dimensions: l.CostAttribution.Dimensions, + }, } } diff --git a/modules/overrides/interface.go b/modules/overrides/interface.go index 9a07352a815..79797c42076 100644 --- a/modules/overrides/interface.go +++ b/modules/overrides/interface.go @@ -76,6 +76,8 @@ type Interface interface { MaxMetricsDuration(userID string) time.Duration DedicatedColumns(userID string) backend.DedicatedColumns UnsafeQueryHints(userID string) bool + CostAttributionMaxCardinality(userID string) uint64 + CostAttributionDimensions(userID string) map[string]string // Management API WriteStatusRuntimeConfig(w io.Writer, r *http.Request) error diff --git a/modules/overrides/runtime_config_overrides.go b/modules/overrides/runtime_config_overrides.go index dd1f4209559..b4fe43bcb9a 100644 --- a/modules/overrides/runtime_config_overrides.go +++ b/modules/overrides/runtime_config_overrides.go @@ -350,6 +350,14 @@ func (o *runtimeConfigOverridesManager) UnsafeQueryHints(userID string) bool { return o.getOverridesForUser(userID).Read.UnsafeQueryHints } +func (o *runtimeConfigOverridesManager) CostAttributionMaxCardinality(userID string) uint64 { + return o.getOverridesForUser(userID).CostAttribution.MaxCardinality +} + +func (o *runtimeConfigOverridesManager) CostAttributionDimensions(userID string) map[string]string { + return o.getOverridesForUser(userID).CostAttribution.Dimensions +} + // MaxSearchDuration is the duration of the max search duration for this tenant. func (o *runtimeConfigOverridesManager) MaxSearchDuration(userID string) time.Duration { return time.Duration(o.getOverridesForUser(userID).Read.MaxSearchDuration) diff --git a/modules/overrides/user_configurable_overrides.go b/modules/overrides/user_configurable_overrides.go index 386dc45b74a..d3e42ac6a2d 100644 --- a/modules/overrides/user_configurable_overrides.go +++ b/modules/overrides/user_configurable_overrides.go @@ -223,6 +223,13 @@ func (o *userConfigurableOverridesManager) Forwarders(userID string) []string { return o.Interface.Forwarders(userID) } +func (o *userConfigurableOverridesManager) CostAttributionDimensions(userID string) map[string]string { + if dims, ok := o.getTenantLimits(userID).GetCostAttribution().GetDimensions(); ok { + return dims + } + return o.Interface.CostAttributionDimensions(userID) +} + func (o *userConfigurableOverridesManager) MetricsGeneratorProcessors(userID string) map[string]struct{} { // We merge settings from both layers meaning if a processor is enabled on any layer it will be always enabled (OR logic) processorsUserConfigurable, _ := o.getTenantLimits(userID).GetMetricsGenerator().GetProcessors() diff --git a/modules/overrides/userconfigurable/api/api_test.go b/modules/overrides/userconfigurable/api/api_test.go index 653ceb4b6bd..f312dfb4989 100644 --- a/modules/overrides/userconfigurable/api/api_test.go +++ b/modules/overrides/userconfigurable/api/api_test.go @@ -62,7 +62,7 @@ func Test_UserConfigOverridesAPI_overridesHandlers(t *testing.T) { name: "GET", handler: overridesAPI.GetHandler, req: prepareRequest(tenant, "GET", nil), - expResp: `{"forwarders":["my-other-forwarder"],"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, + expResp: `{"forwarders":["my-other-forwarder"],"cost_attribution":{},"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, expContentType: api.HeaderAcceptJSON, expStatusCode: 200, }, @@ -149,7 +149,7 @@ func Test_UserConfigOverridesAPI_patchOverridesHandlers(t *testing.T) { name: "PATCH - no values stored yet", patch: `{"forwarders":["my-other-forwarder"]}`, current: ``, - expResp: `{"forwarders":["my-other-forwarder"],"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, + expResp: `{"forwarders":["my-other-forwarder"],"cost_attribution":{},"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, expContentType: api.HeaderAcceptJSON, expStatusCode: 200, }, @@ -157,7 +157,7 @@ func Test_UserConfigOverridesAPI_patchOverridesHandlers(t *testing.T) { name: "PATCH - empty overrides are merged", patch: `{"forwarders":["my-other-forwarder"]}`, current: `{}`, - expResp: `{"forwarders":["my-other-forwarder"],"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, + expResp: `{"forwarders":["my-other-forwarder"],"cost_attribution":{},"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, expContentType: api.HeaderAcceptJSON, expStatusCode: 200, }, @@ -165,7 +165,7 @@ func Test_UserConfigOverridesAPI_patchOverridesHandlers(t *testing.T) { name: "PATCH - overwrite", patch: `{"forwarders":["my-other-forwarder"]}`, current: `{"forwarders":["previous-forwarder"]}`, - expResp: `{"forwarders":["my-other-forwarder"],"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, + expResp: `{"forwarders":["my-other-forwarder"],"cost_attribution":{},"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, expContentType: api.HeaderAcceptJSON, expStatusCode: 200, }, @@ -247,7 +247,7 @@ func TestUserConfigOverridesAPI_patchOverridesHandler_noVersionConflict(t *testi overridesAPI.PatchHandler(w, r) data := w.Body.String() - assert.Equal(t, `{"forwarders":["f"],"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, data) + assert.Equal(t, `{"forwarders":["f"],"cost_attribution":{},"metrics_generator":{"processor":{"service_graphs":{},"span_metrics":{}}}}`, data) res := w.Result() assert.Equal(t, "2", res.Header.Get(headerEtag)) diff --git a/modules/overrides/userconfigurable/api/limits_test.go b/modules/overrides/userconfigurable/api/limits_test.go index 60d19a082d7..ddba17403d9 100644 --- a/modules/overrides/userconfigurable/api/limits_test.go +++ b/modules/overrides/userconfigurable/api/limits_test.go @@ -63,6 +63,7 @@ func Test_limitsFromOverrides(t *testing.T) { "forwarders": [ "my-forwarder" ], + "cost_attribution": {}, "metrics_generator": { "processors": [ "service-graphs" diff --git a/modules/overrides/userconfigurable/client/limits.go b/modules/overrides/userconfigurable/client/limits.go index 62cb9afef99..1438b2a95c7 100644 --- a/modules/overrides/userconfigurable/client/limits.go +++ b/modules/overrides/userconfigurable/client/limits.go @@ -8,8 +8,8 @@ import ( ) type Limits struct { - Forwarders *[]string `yaml:"forwarders,omitempty" json:"forwarders,omitempty"` - + Forwarders *[]string `yaml:"forwarders,omitempty" json:"forwarders,omitempty"` + CostAttribution CostAttribution `yaml:"cost_attribution,omitempty" json:"cost_attribution,omitempty"` MetricsGenerator LimitsMetricsGenerator `yaml:"metrics_generator,omitempty" json:"metrics_generator,omitempty"` } @@ -27,6 +27,13 @@ func (l *Limits) GetMetricsGenerator() *LimitsMetricsGenerator { return nil } +func (l *Limits) GetCostAttribution() *CostAttribution { + if l != nil { + return &l.CostAttribution + } + return nil +} + type LimitsMetricsGenerator struct { Processors listtomap.ListToMap `yaml:"processors,omitempty" json:"processors,omitempty"` DisableCollection *bool `yaml:"disable_collection,omitempty" json:"disable_collection,omitempty"` @@ -168,3 +175,14 @@ func (l *LimitsMetricsGeneratorProcessorSpanMetrics) GetTargetInfoExcludedDimens } return nil, false } + +type CostAttribution struct { + Dimensions *map[string]string `yaml:"dimensions,omitempty" json:"dimensions,omitempty"` +} + +func (l *CostAttribution) GetDimensions() (map[string]string, bool) { + if l != nil && l.Dimensions != nil { + return *l.Dimensions, true + } + return nil, false +} diff --git a/modules/querier/http.go b/modules/querier/http.go index 247d512445f..bd10f03c746 100644 --- a/modules/querier/http.go +++ b/modules/querier/http.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "strings" "time" "github.com/golang/protobuf/jsonpb" //nolint:all //deprecated @@ -425,9 +426,10 @@ func handleError(w http.ResponseWriter, err error) { return } - // todo: better understand all errors returned from queriers and categorize more as 4XX - if errors.Is(err, trace.ErrTraceTooLarge) { - http.Error(w, err.Error(), http.StatusBadRequest) + // TODO: better understand all errors returned from queriers and categorize more as 4XX + // NOTE: we receive a GRPC error from the ingesters, and so we need to check the string content of error as well. + if errors.Is(err, trace.ErrTraceTooLarge) || strings.Contains(err.Error(), trace.ErrTraceTooLarge.Error()) { + http.Error(w, err.Error(), http.StatusUnprocessableEntity) return } diff --git a/modules/querier/querier.go b/modules/querier/querier.go index b5602462164..513f1fd0973 100644 --- a/modules/querier/querier.go +++ b/modules/querier/querier.go @@ -59,6 +59,12 @@ var ( }) ) +type ( + forEachFn func(ctx context.Context, client tempopb.QuerierClient) error + forEachGeneratorFn func(ctx context.Context, client tempopb.MetricsGeneratorClient) error + replicationSetFn func(r ring.ReadRing) (ring.ReplicationSet, error) +) + // Querier handlers queries. type Querier struct { services.Service @@ -83,11 +89,6 @@ type Querier struct { subservicesWatcher *services.FailureWatcher } -type responseFromGenerators struct { - addr string - response interface{} -} - // New makes a new Querier. func New( cfg Config, @@ -322,11 +323,6 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque return resp, nil } -type ( - forEachFn func(ctx context.Context, client tempopb.QuerierClient) error - replicationSetFn func(r ring.ReadRing) (ring.ReplicationSet, error) -) - // forIngesterRings runs f, in parallel, for given ingesters func (q *Querier) forIngesterRings(ctx context.Context, userID string, getReplicationSet replicationSetFn, f forEachFn) error { if ctx.Err() != nil { @@ -424,14 +420,10 @@ func forOneIngesterRing(ctx context.Context, replicationSet ring.ReplicationSet, } // forGivenGenerators runs f, in parallel, for given generators -func (q *Querier) forGivenGenerators( - ctx context.Context, - replicationSet ring.ReplicationSet, - f func(ctx context.Context, client tempopb.MetricsGeneratorClient) (interface{}, error), -) ([]responseFromGenerators, error) { +func (q *Querier) forGivenGenerators(ctx context.Context, replicationSet ring.ReplicationSet, f forEachGeneratorFn) error { if ctx.Err() != nil { _ = level.Debug(log.Logger).Log("foreGivenGenerators context error", "ctx.Err()", ctx.Err().Error()) - return nil, ctx.Err() + return ctx.Err() } ctx, span := tracer.Start(ctx, "Querier.forGivenGenerators") @@ -448,25 +440,25 @@ func (q *Querier) forGivenGenerators( return nil, fmt.Errorf("failed to get client for %s: %w", generator.Addr, err) } - resp, err := f(funcCtx, client.(tempopb.MetricsGeneratorClient)) + err = f(funcCtx, client.(tempopb.MetricsGeneratorClient)) if err != nil { return nil, fmt.Errorf("failed to execute f() for %s: %w", generator.Addr, err) } - return responseFromGenerators{generator.Addr, resp}, nil + // we are returning the empty response here because response is collected by + // the collector inside forEachGeneratorFn + return nil, nil } - results, err := replicationSet.Do(ctx, q.cfg.ExtraQueryDelay, doFunc) + // ignore response because it's nil, and we are using a collector inside forEachGeneratorFn to + // collect the actual response. we need to return nil here and ignore it + // because doFunc expects us to return a response + _, err := replicationSet.Do(ctx, q.cfg.ExtraQueryDelay, doFunc) if err != nil { - return nil, fmt.Errorf("failed to get response from generators: %w", err) - } - - responses := make([]responseFromGenerators, 0, len(results)) - for _, result := range results { - responses = append(responses, result.(responseFromGenerators)) + return fmt.Errorf("failed to get response from generators: %w", err) } - return responses, nil + return nil } func (q *Querier) SearchRecent(ctx context.Context, req *tempopb.SearchRequest) (*tempopb.SearchResponse, error) { @@ -521,6 +513,7 @@ func (q *Querier) SearchTagsBlocks(ctx context.Context, req *tempopb.SearchTagsB return &tempopb.SearchTagsResponse{ TagNames: distinctValues.Strings(), + Metrics: v2Response.Metrics, }, nil } @@ -544,16 +537,22 @@ func (q *Querier) SearchTags(ctx context.Context, req *tempopb.SearchTagsRequest limit := q.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewDistinctString(limit) + mc := collector.NewMetricsCollector() forEach := func(ctx context.Context, client tempopb.QuerierClient) error { resp, err := client.SearchTags(ctx, req) if err != nil { return err } + // collect metrics first because we stop early with return + if resp.Metrics != nil { + mc.Add(resp.Metrics.InspectedBytes) + } + for _, tag := range resp.TagNames { distinctValues.Collect(tag) if distinctValues.Exceeded() { - break // stop early + return nil // stop early } } return nil @@ -564,14 +563,13 @@ func (q *Querier) SearchTags(ctx context.Context, req *tempopb.SearchTagsRequest } if distinctValues.Exceeded() { - level.Warn(log.Logger).Log("msg", "size of tags in instance exceeded limit, reduce cardinality or size of tags", "userID", userID, "limit", limit, "total", distinctValues.TotalDataSize()) + level.Warn(log.Logger).Log("msg", "size of tags in instance exceeded limit, reduce cardinality or size of tags", "userID", userID, "limit", limit, "size", distinctValues.Size()) } - resp := &tempopb.SearchTagsResponse{ + return &tempopb.SearchTagsResponse{ TagNames: distinctValues.Strings(), - } - - return resp, nil + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, + }, nil } func (q *Querier) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsRequest) (*tempopb.SearchTagsV2Response, error) { @@ -582,6 +580,7 @@ func (q *Querier) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsReque limit := q.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewScopedDistinctString(limit) + mc := collector.NewMetricsCollector() // Get results from all ingesters forEach := func(ctx context.Context, client tempopb.QuerierClient) error { @@ -589,10 +588,14 @@ func (q *Querier) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsReque if err != nil { return err } + // collect metrics first because we stop early with return + if resp.Metrics != nil { + mc.Add(resp.Metrics.InspectedBytes) + } + for _, res := range resp.Scopes { for _, tag := range res.Tags { - distinctValues.Collect(res.Name, tag) - if distinctValues.Exceeded() { + if distinctValues.Collect(res.Name, tag) { return nil } } @@ -611,7 +614,8 @@ func (q *Querier) SearchTagsV2(ctx context.Context, req *tempopb.SearchTagsReque collected := distinctValues.Strings() resp := &tempopb.SearchTagsV2Response{ - Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(collected)), + Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(collected)), + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, // send metrics with response } for scope, vals := range collected { resp.Scopes = append(resp.Scopes, &tempopb.SearchTagsV2Scope{ @@ -631,6 +635,7 @@ func (q *Querier) SearchTagValues(ctx context.Context, req *tempopb.SearchTagVal limit := q.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewDistinctString(limit) + mc := collector.NewMetricsCollector() // Virtual tags values. Get these first. for _, v := range search.GetVirtualTagValues(req.TagName) { @@ -643,13 +648,17 @@ func (q *Querier) SearchTagValues(ctx context.Context, req *tempopb.SearchTagVal if err != nil { return err } + // add metrics first because we stop early with return + if resp.Metrics != nil { + mc.Add(resp.Metrics.InspectedBytes) + } + for _, res := range resp.TagValues { distinctValues.Collect(res) if distinctValues.Exceeded() { return nil } } - return nil } @@ -659,14 +668,13 @@ func (q *Querier) SearchTagValues(ctx context.Context, req *tempopb.SearchTagVal } if distinctValues.Exceeded() { - level.Warn(log.Logger).Log("msg", "size of tag values in instance exceeded limit, reduce cardinality or size of tags", "tag", req.TagName, "userID", userID, "limit", limit, "total", distinctValues.TotalDataSize()) + level.Warn(log.Logger).Log("msg", "size of tag values in instance exceeded limit, reduce cardinality or size of tags", "tag", req.TagName, "userID", userID, "limit", limit, "size", distinctValues.Size()) } - resp := &tempopb.SearchTagValuesResponse{ + return &tempopb.SearchTagValuesResponse{ TagValues: distinctValues.Strings(), - } - - return resp, nil + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, + }, nil } func (q *Querier) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTagValuesRequest) (*tempopb.SearchTagValuesV2Response, error) { @@ -677,6 +685,7 @@ func (q *Querier) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTagV limit := q.limits.MaxBytesPerTagValuesQuery(userID) distinctValues := collector.NewDistinctValue(limit, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) + mc := collector.NewMetricsCollector() // Virtual tags values. Get these first. virtualVals := search.GetVirtualTagValuesV2(req.TagName) @@ -688,18 +697,25 @@ func (q *Querier) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTagV // with v2 search we can confidently bail if GetVirtualTagValuesV2 gives us any hits. this doesn't work // in v1 search b/c intrinsic tags like "status" are conflated with attributes named "status" if virtualVals != nil { - return valuesToV2Response(distinctValues), nil + // no data was read to collect virtual tags so 0 bytesRead + return valuesToV2Response(distinctValues, 0), nil } forEach := func(ctx context.Context, client tempopb.QuerierClient) error { + // combine metrics as we get results from ingesters resp, err := client.SearchTagValuesV2(ctx, req) if err != nil { return err } + // collect metrics first, we stop early with return + if resp.Metrics != nil { + mc.Add(resp.Metrics.InspectedBytes) + } + for _, res := range resp.TagValues { distinctValues.Collect(*res) if distinctValues.Exceeded() { - break // stop early + return nil // stop early } } return nil @@ -713,7 +729,7 @@ func (q *Querier) SearchTagValuesV2(ctx context.Context, req *tempopb.SearchTagV _ = level.Warn(log.Logger).Log("msg", "size of tag values exceeded limit, reduce cardinality or size of tags", "tag", req.TagName, "userID", userID, "limit", limit, "size", distinctValues.Size()) } - return valuesToV2Response(distinctValues), nil + return valuesToV2Response(distinctValues, mc.TotalValue()), nil } func (q *Querier) SpanMetricsSummary( @@ -740,23 +756,26 @@ func (q *Querier) SpanMetricsSummary( if err != nil { return nil, fmt.Errorf("error finding generators in Querier.SpanMetricsSummary: %w", err) } - lookupResults, err := q.forGivenGenerators( - ctx, - replicationSet, - func(ctx context.Context, client tempopb.MetricsGeneratorClient) (interface{}, error) { - return client.GetMetrics(ctx, genReq) - }, - ) + + var results []*tempopb.SpanMetricsResponse + mtx := sync.Mutex{} + + forEach := func(ctx context.Context, client tempopb.MetricsGeneratorClient) error { + resp, err := client.GetMetrics(ctx, genReq) + if err != nil { + return err + } + // collect the results from the generators in the pool + mtx.Lock() + defer mtx.Unlock() + results = append(results, resp) + return nil + } + err = q.forGivenGenerators(ctx, replicationSet, forEach) if err != nil { return nil, fmt.Errorf("error querying generators in Querier.SpanMetricsSummary: %w", err) } - // Assemble the results from the generators in the pool - results := make([]*tempopb.SpanMetricsResponse, 0, len(lookupResults)) - for _, result := range lookupResults { - results = append(results, result.response.(*tempopb.SpanMetricsResponse)) - } - // Combine the results yyy := make(map[traceqlmetrics.MetricKeys]*traceqlmetrics.LatencyHistogram) xxx := make(map[traceqlmetrics.MetricKeys]*tempopb.SpanMetricsSummary) @@ -807,13 +826,14 @@ func (q *Querier) SpanMetricsSummary( return resp, nil } -func valuesToV2Response(distinctValues *collector.DistinctValue[tempopb.TagValue]) *tempopb.SearchTagValuesV2Response { - resp := &tempopb.SearchTagValuesV2Response{} +func valuesToV2Response(distinctValues *collector.DistinctValue[tempopb.TagValue], bytesRead uint64) *tempopb.SearchTagValuesV2Response { + resp := &tempopb.SearchTagValuesV2Response{ + Metrics: &tempopb.MetadataMetrics{InspectedBytes: bytesRead}, + } for _, v := range distinctValues.Values() { v2 := v resp.TagValues = append(resp.TagValues, &v2) } - return resp } @@ -901,6 +921,8 @@ func (q *Querier) internalTagsSearchBlockV2(ctx context.Context, req *tempopb.Se Tags: search.GetVirtualIntrinsicValues(), }, }, + // no bytes were scanned to return the intrinsic values + Metrics: &tempopb.MetadataMetrics{InspectedBytes: 0}, }, nil } @@ -959,8 +981,11 @@ func (q *Querier) internalTagsSearchBlockV2(ctx context.Context, req *tempopb.Se return resp, nil } + valueCollector := collector.NewScopedDistinctString(q.limits.MaxBytesPerTagValuesQuery(tenantID)) + mc := collector.NewMetricsCollector() + fetcher := traceql.NewTagNamesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback) error { - return q.store.FetchTagNames(ctx, meta, req, cb, common.DefaultSearchOptions()) + return q.store.FetchTagNames(ctx, meta, req, cb, mc.Add, common.DefaultSearchOptions()) }) scope := traceql.AttributeScopeFromString(req.SearchReq.Scope) @@ -968,10 +993,8 @@ func (q *Querier) internalTagsSearchBlockV2(ctx context.Context, req *tempopb.Se return nil, fmt.Errorf("unknown scope: %s", req.SearchReq.Scope) } - valueCollector := collector.NewScopedDistinctString(q.limits.MaxBytesPerTagValuesQuery(tenantID)) err = q.engine.ExecuteTagNames(ctx, scope, query, func(tag string, scope traceql.AttributeScope) bool { - valueCollector.Collect(scope.String(), tag) - return valueCollector.Exceeded() + return valueCollector.Collect(scope.String(), tag) }, fetcher) if err != nil { return nil, err @@ -979,7 +1002,8 @@ func (q *Querier) internalTagsSearchBlockV2(ctx context.Context, req *tempopb.Se scopedVals := valueCollector.Strings() resp := &tempopb.SearchTagsV2Response{ - Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(scopedVals)), + Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(scopedVals)), + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, // send metrics with response } for scope, vals := range scopedVals { resp.Scopes = append(resp.Scopes, &tempopb.SearchTagsV2Scope{ @@ -1029,14 +1053,12 @@ func (q *Querier) internalTagValuesSearchBlock(ctx context.Context, req *tempopb opts.StartPage = int(req.StartPage) opts.TotalPages = int(req.PagesToSearch) - values, err := q.store.SearchTagValues(ctx, meta, req.SearchReq.TagName, opts) + resp, err := q.store.SearchTagValues(ctx, meta, req.SearchReq.TagName, opts) if err != nil { return &tempopb.SearchTagValuesResponse{}, err } - return &tempopb.SearchTagValuesResponse{ - TagValues: values, - }, nil + return resp, nil } func (q *Querier) internalTagValuesSearchBlockV2(ctx context.Context, req *tempopb.SearchTagValuesBlockRequest) (*tempopb.SearchTagValuesV2Response, error) { @@ -1087,17 +1109,19 @@ func (q *Querier) internalTagValuesSearchBlockV2(ctx context.Context, req *tempo return nil, err } + valueCollector := collector.NewDistinctValue(q.limits.MaxBytesPerTagValuesQuery(tenantID), func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) + mc := collector.NewMetricsCollector() + fetcher := traceql.NewTagValuesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback) error { - return q.store.FetchTagValues(ctx, meta, req, cb, opts) + return q.store.FetchTagValues(ctx, meta, req, cb, mc.Add, opts) }) - valueCollector := collector.NewDistinctValue(q.limits.MaxBytesPerTagValuesQuery(tenantID), func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) err = q.engine.ExecuteTagValues(ctx, tag, query, traceql.MakeCollectTagValueFunc(valueCollector.Collect), fetcher) if err != nil { return nil, err } - return valuesToV2Response(valueCollector), nil + return valuesToV2Response(valueCollector, mc.TotalValue()), nil } func (q *Querier) postProcessIngesterSearchResults(req *tempopb.SearchRequest, results []*tempopb.SearchResponse) *tempopb.SearchResponse { diff --git a/modules/querier/querier_query_range.go b/modules/querier/querier_query_range.go index 4239ed525ef..bee13a3a155 100644 --- a/modules/querier/querier_query_range.go +++ b/modules/querier/querier_query_range.go @@ -3,6 +3,7 @@ package querier import ( "context" "fmt" + "sync" "time" "github.com/go-kit/log/level" @@ -30,26 +31,27 @@ func (q *Querier) queryRangeRecent(ctx context.Context, req *tempopb.QueryRangeR if err != nil { return nil, fmt.Errorf("error finding generators in Querier.queryRangeRecent: %w", err) } - lookupResults, err := q.forGivenGenerators( - ctx, - replicationSet, - func(ctx context.Context, client tempopb.MetricsGeneratorClient) (interface{}, error) { - return client.QueryRange(ctx, req) - }, - ) - if err != nil { - _ = level.Error(log.Logger).Log("error querying generators in Querier.queryRangeRecent", "err", err) - - return nil, fmt.Errorf("error querying generators in Querier.queryRangeRecent: %w", err) - } c, err := traceql.QueryRangeCombinerFor(req, traceql.AggregateModeSum, false) if err != nil { return nil, err } - for _, result := range lookupResults { - c.Combine(result.response.(*tempopb.QueryRangeResponse)) + mtx := sync.Mutex{} // combiner doesn't lock, so take lock before calling Combine to make is safe + forEach := func(ctx context.Context, client tempopb.MetricsGeneratorClient) error { + resp, err := client.QueryRange(ctx, req) + if err != nil { + return err + } + mtx.Lock() + defer mtx.Unlock() + c.Combine(resp) + return nil + } + err = q.forGivenGenerators(ctx, replicationSet, forEach) + if err != nil { + _ = level.Error(log.Logger).Log("error querying generators in Querier.queryRangeRecent", "err", err) + return nil, fmt.Errorf("error querying generators in Querier.queryRangeRecent: %w", err) } return c.Response(), nil diff --git a/modules/querier/querier_test.go b/modules/querier/querier_test.go index 55a0ea808af..2548be01a49 100644 --- a/modules/querier/querier_test.go +++ b/modules/querier/querier_test.go @@ -111,14 +111,14 @@ func TestVirtualTagsDoesntHitBackend(t *testing.T) { TagName: "duration", }) require.NoError(t, err) - require.Equal(t, &tempopb.SearchTagValuesV2Response{}, resp) + require.Equal(t, &tempopb.SearchTagValuesV2Response{Metrics: &tempopb.MetadataMetrics{}}, resp) // traceDuration should return nothing resp, err = q.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ TagName: "traceDuration", }) require.NoError(t, err) - require.Equal(t, &tempopb.SearchTagValuesV2Response{}, resp) + require.Equal(t, &tempopb.SearchTagValuesV2Response{Metrics: &tempopb.MetadataMetrics{}}, resp) // status should return a static list resp, err = q.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ @@ -141,6 +141,7 @@ func TestVirtualTagsDoesntHitBackend(t *testing.T) { Value: "unset", }, }, + Metrics: &tempopb.MetadataMetrics{}, }, resp) // kind should return a static list @@ -176,11 +177,13 @@ func TestVirtualTagsDoesntHitBackend(t *testing.T) { Value: "unspecified", }, }, + Metrics: &tempopb.MetadataMetrics{}, }, resp) - // this should error b/c it will attempt to hit the unconfigured backend - _, err = q.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ + // this should error b/c it will attempt to hit the un-configured backend + resp, err = q.SearchTagValuesV2(ctx, &tempopb.SearchTagValuesRequest{ TagName: ".foo", }) require.Error(t, err) + require.Nil(t, resp) } diff --git a/pkg/collector/distinct_string_collector.go b/pkg/collector/distinct_string_collector.go index a1b4a6c95c0..ca881703e26 100644 --- a/pkg/collector/distinct_string_collector.go +++ b/pkg/collector/distinct_string_collector.go @@ -7,12 +7,13 @@ import ( ) type DistinctString struct { - values map[string]struct{} - new map[string]struct{} - maxLen int - currLen int - totalLen int - mtx sync.Mutex + values map[string]struct{} + new map[string]struct{} + maxLen int + currLen int + diffEnabled bool + limExceeded bool + mtx sync.Mutex } // NewDistinctString with the given maximum data size. This is calculated @@ -20,38 +21,54 @@ type DistinctString struct { // is interpreted as unlimited. func NewDistinctString(maxDataSize int) *DistinctString { return &DistinctString{ - values: make(map[string]struct{}), - new: make(map[string]struct{}), - maxLen: maxDataSize, + values: make(map[string]struct{}), + maxLen: maxDataSize, + diffEnabled: false, // disable diff to make it faster } } -// Collect adds a new value to the distinct string collector. -// return indicates if the value was added or not. -func (d *DistinctString) Collect(s string) bool { +// NewDistinctStringWithDiff is like NewDistinctString but with diff support enabled. +func NewDistinctStringWithDiff(maxDataSize int) *DistinctString { + return &DistinctString{ + values: make(map[string]struct{}), + new: make(map[string]struct{}), + maxLen: maxDataSize, + diffEnabled: true, + } +} + +// Collect adds a new value to the distinct string collector +// and returns a boolean indicating whether the value was successfully added or not. +// To check if the limit has been reached, you must call the Exceeded method separately. +func (d *DistinctString) Collect(s string) (added bool) { d.mtx.Lock() defer d.mtx.Unlock() + if d.limExceeded { + return false + } + if _, ok := d.values[s]; ok { // Already present return false } - // New entry - d.totalLen += len(s) - + valueLen := len(s) // Can it fit? - if d.maxLen > 0 && d.currLen+len(s) > d.maxLen { - // No + if d.maxLen > 0 && d.currLen+valueLen > d.maxLen { + // No, it can't fit + d.limExceeded = true return false } // Clone instead of referencing original s = strings.Clone(s) - d.new[s] = struct{}{} + if d.diffEnabled { + d.new[s] = struct{}{} + } d.values[s] = struct{}{} - d.currLen += len(s) + d.currLen += valueLen return true } @@ -76,19 +93,24 @@ func (d *DistinctString) Exceeded() bool { d.mtx.Lock() defer d.mtx.Unlock() - return d.totalLen > d.currLen + return d.limExceeded } -// TotalDataSize is the total size of all distinct strings encountered. -func (d *DistinctString) TotalDataSize() int { +// Size is the total size of all distinct strings encountered. +func (d *DistinctString) Size() int { d.mtx.Lock() defer d.mtx.Unlock() - return d.totalLen + return d.currLen } // Diff returns all new strings collected since the last time diff was called -func (d *DistinctString) Diff() []string { +func (d *DistinctString) Diff() ([]string, error) { + // can check diffEnabled without lock because it is not modified after creation + if !d.diffEnabled { + return nil, errDiffNotEnabled + } + d.mtx.Lock() defer d.mtx.Unlock() @@ -100,5 +122,5 @@ func (d *DistinctString) Diff() []string { clear(d.new) sort.Strings(ss) - return ss + return ss, nil } diff --git a/pkg/collector/distinct_string_collector_test.go b/pkg/collector/distinct_string_collector_test.go index 06d0b1277da..ec778e3012c 100644 --- a/pkg/collector/distinct_string_collector_test.go +++ b/pkg/collector/distinct_string_collector_test.go @@ -2,6 +2,7 @@ package collector import ( "fmt" + "strconv" "sync" "testing" @@ -17,23 +18,34 @@ func TestDistinctStringCollector(t *testing.T) { d.Collect("11") require.True(t, d.Exceeded()) - require.Equal(t, []string{"123", "4567", "890"}, d.Strings()) + stringsSlicesEqual(t, []string{"123", "4567", "890"}, d.Strings()) + + // diff fails when diff is not enabled + res, err := d.Diff() + require.Nil(t, res) + require.Error(t, err, errDiffNotEnabled) } func TestDistinctStringCollectorDiff(t *testing.T) { - d := NewDistinctString(0) + d := NewDistinctStringWithDiff(0) d.Collect("123") d.Collect("4567") - require.Equal(t, []string{"123", "4567"}, d.Diff()) - require.Equal(t, []string{}, d.Diff()) + stringsSlicesEqual(t, []string{"123", "4567"}, readDistinctStringDiff(t, d)) + stringsSlicesEqual(t, []string{}, readDistinctStringDiff(t, d)) d.Collect("123") d.Collect("890") - require.Equal(t, []string{"890"}, d.Diff()) - require.Equal(t, []string{}, d.Diff()) + stringsSlicesEqual(t, []string{"890"}, readDistinctStringDiff(t, d)) + stringsSlicesEqual(t, []string{}, readDistinctStringDiff(t, d)) +} + +func readDistinctStringDiff(t *testing.T, d *DistinctString) []string { + res, err := d.Diff() + require.NoError(t, err) + return res } func TestDistinctStringCollectorIsSafe(t *testing.T) { @@ -53,3 +65,54 @@ func TestDistinctStringCollectorIsSafe(t *testing.T) { require.Equal(t, len(d.Strings()), 10*100) require.False(t, d.Exceeded()) } + +func BenchmarkDistinctStringCollect(b *testing.B) { + // simulate 100 ingesters, each returning 10_000 tag values + numIngesters := 100 + numTagValuesPerIngester := 10_000 + ingesterStrings := make([][]string, numIngesters) + for i := 0; i < numIngesters; i++ { + strings := make([]string, numTagValuesPerIngester) + for j := 0; j < numTagValuesPerIngester; j++ { + strings[j] = fmt.Sprintf("string_%d_%d", i, j) + } + ingesterStrings[i] = strings + } + + limits := []int{ + 0, // no limit + 100_000, // 100KB + 1_000_000, // 1MB + 10_000_000, // 10MB + } + + b.ResetTimer() // to exclude the setup time for generating tag values + for _, lim := range limits { + b.Run("uniques_limit:"+strconv.Itoa(lim), func(b *testing.B) { + for n := 0; n < b.N; n++ { + distinctStrings := NewDistinctString(lim) + for _, values := range ingesterStrings { + for _, v := range values { + if distinctStrings.Collect(v) { + break // stop early if limit is reached + } + } + } + } + }) + + b.Run("duplicates_limit:"+strconv.Itoa(lim), func(b *testing.B) { + for n := 0; n < b.N; n++ { + distinctStrings := NewDistinctString(lim) + for i := 0; i < numIngesters; i++ { + for j := 0; j < numTagValuesPerIngester; j++ { + // collect first item to simulate duplicates + if distinctStrings.Collect(ingesterStrings[i][0]) { + break // stop early if limit is reached + } + } + } + } + }) + } +} diff --git a/pkg/collector/distinct_value_collector.go b/pkg/collector/distinct_value_collector.go index 930241ef796..a8359667639 100644 --- a/pkg/collector/distinct_value_collector.go +++ b/pkg/collector/distinct_value_collector.go @@ -1,9 +1,12 @@ package collector import ( + "errors" "sync" ) +var errDiffNotEnabled = errors.New("diff not enabled") + type DistinctValue[T comparable] struct { values map[T]struct{} new map[T]struct{} @@ -22,7 +25,6 @@ type DistinctValue[T comparable] struct { func NewDistinctValue[T comparable](maxDataSize int, len func(T) int) *DistinctValue[T] { return &DistinctValue[T]{ values: make(map[T]struct{}), - new: make(map[T]struct{}), maxLen: maxDataSize, diffEnabled: false, // disable diff to make it faster len: len, @@ -108,19 +110,20 @@ func (d *DistinctValue[T]) Size() int { // Diff returns all new strings collected since the last time diff was called // returns nil if diff is not enabled -func (d *DistinctValue[T]) Diff() []T { - d.mtx.Lock() - defer d.mtx.Unlock() - +func (d *DistinctValue[T]) Diff() ([]T, error) { + // can check diffEnabled without lock because it is not modified after creation if !d.diffEnabled { - return nil + return nil, errDiffNotEnabled } + d.mtx.Lock() + defer d.mtx.Unlock() + ss := make([]T, 0, len(d.new)) for k := range d.new { ss = append(ss, k) } clear(d.new) - return ss + return ss, nil } diff --git a/pkg/collector/distinct_value_collector_test.go b/pkg/collector/distinct_value_collector_test.go index b17b1c3e680..d0bd470db97 100644 --- a/pkg/collector/distinct_value_collector_test.go +++ b/pkg/collector/distinct_value_collector_test.go @@ -10,20 +10,47 @@ import ( "github.com/stretchr/testify/require" ) +func TestDistinctValueCollector(t *testing.T) { + d := NewDistinctValue[string](10, func(s string) int { return len(s) }) + + var stop bool + stop = d.Collect("123") + require.False(t, stop) + stop = d.Collect("4567") + require.False(t, stop) + stop = d.Collect("890") + require.True(t, stop) + + require.True(t, d.Exceeded()) + require.Equal(t, stop, d.Exceeded()) // final stop should be same as Exceeded + stringsSlicesEqual(t, []string{"123", "4567"}, d.Values()) + + // diff fails when diff is not enabled + res, err := d.Diff() + require.Nil(t, res) + require.Error(t, err, errDiffNotEnabled) +} + func TestDistinctValueCollectorDiff(t *testing.T) { d := NewDistinctValueWithDiff[string](0, func(s string) int { return len(s) }) d.Collect("123") d.Collect("4567") - stringsSlicesEqual(t, []string{"123", "4567"}, d.Diff()) - stringsSlicesEqual(t, []string{}, d.Diff()) + stringsSlicesEqual(t, []string{"123", "4567"}, readDistinctValueDiff(t, d)) + stringsSlicesEqual(t, []string{}, readDistinctValueDiff(t, d)) d.Collect("123") d.Collect("890") - stringsSlicesEqual(t, []string{"890"}, d.Diff()) - stringsSlicesEqual(t, []string{}, d.Diff()) + stringsSlicesEqual(t, []string{"890"}, readDistinctValueDiff(t, d)) + stringsSlicesEqual(t, []string{}, readDistinctValueDiff(t, d)) +} + +func readDistinctValueDiff(t *testing.T, d *DistinctValue[string]) []string { + res, err := d.Diff() + require.NoError(t, err) + return res } func stringsSlicesEqual(t *testing.T, a, b []string) { @@ -32,7 +59,7 @@ func stringsSlicesEqual(t *testing.T, a, b []string) { require.Equal(t, a, b) } -func BenchmarkCollect(b *testing.B) { +func BenchmarkDistinctValueCollect(b *testing.B) { // simulate 100 ingesters, each returning 10_000 tag values numIngesters := 100 numTagValuesPerIngester := 10_000 @@ -45,7 +72,6 @@ func BenchmarkCollect(b *testing.B) { Value: fmt.Sprintf("value_%d_%d", i, j), } } - ingesterTagValues[i] = tagValues } limits := []int{ @@ -57,9 +83,8 @@ func BenchmarkCollect(b *testing.B) { b.ResetTimer() // to exclude the setup time for generating tag values for _, lim := range limits { - b.Run("limit:"+strconv.Itoa(lim), func(b *testing.B) { + b.Run("uniques_limit:"+strconv.Itoa(lim), func(b *testing.B) { for n := 0; n < b.N; n++ { - // NewDistinctValue is collecting tag values without diff support distinctValues := NewDistinctValue(lim, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) for _, tagValues := range ingesterTagValues { for _, v := range tagValues { @@ -70,5 +95,19 @@ func BenchmarkCollect(b *testing.B) { } } }) + + b.Run("duplicates_limit:"+strconv.Itoa(lim), func(b *testing.B) { + for n := 0; n < b.N; n++ { + distinctValues := NewDistinctValue(lim, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) + for i := 0; i < numIngesters; i++ { + for j := 0; j < numTagValuesPerIngester; j++ { + // collect first item to simulate duplicates + if distinctValues.Collect(ingesterTagValues[i][0]) { + break // stop early if limit is reached + } + } + } + } + }) } } diff --git a/pkg/collector/metrics_collector.go b/pkg/collector/metrics_collector.go new file mode 100644 index 00000000000..8a00651b5b4 --- /dev/null +++ b/pkg/collector/metrics_collector.go @@ -0,0 +1,28 @@ +package collector + +import ( + "go.uber.org/atomic" +) + +// MetricsCollector is a simple collector that can be used to accumulate a metric +// we primarily use it to collect the total bytes read from a reader across a request +type MetricsCollector struct { + totalValue *atomic.Uint64 +} + +func NewMetricsCollector() *MetricsCollector { + return &MetricsCollector{ + totalValue: atomic.NewUint64(0), + } +} + +// Add adds new bytes read to TotalValue. this method is thread safe and +// satisfies the common.MetricsCallback type so it's used as callback at a lot of places +func (mc *MetricsCollector) Add(value uint64) { + mc.totalValue.Add(value) +} + +// TotalValue returns the sum of total values collected by the collector +func (mc *MetricsCollector) TotalValue() uint64 { + return mc.totalValue.Load() +} diff --git a/pkg/collector/scoped_distinct_string.go b/pkg/collector/scoped_distinct_string.go index aa948607c3a..abe902122c7 100644 --- a/pkg/collector/scoped_distinct_string.go +++ b/pkg/collector/scoped_distinct_string.go @@ -1,44 +1,68 @@ package collector -import "sync" +import ( + "sync" +) type ScopedDistinctString struct { - cols map[string]*DistinctString - maxLen int - curLen int - exceeded bool - mtx sync.Mutex + cols map[string]*DistinctString + newCol func(int) *DistinctString + maxLen int + curLen int + limExceeded bool + diffEnabled bool + mtx sync.Mutex } -func NewScopedDistinctString(sz int) *ScopedDistinctString { +func NewScopedDistinctString(maxDataSize int) *ScopedDistinctString { return &ScopedDistinctString{ - cols: map[string]*DistinctString{}, - maxLen: sz, + cols: map[string]*DistinctString{}, + newCol: NewDistinctString, + maxLen: maxDataSize, + diffEnabled: false, } } -func (d *ScopedDistinctString) Collect(scope string, val string) { +func NewScopedDistinctStringWithDiff(maxDataSize int) *ScopedDistinctString { + return &ScopedDistinctString{ + cols: map[string]*DistinctString{}, + newCol: NewDistinctStringWithDiff, + maxLen: maxDataSize, + diffEnabled: true, + } +} + +// Collect adds a new value to the distinct string collector. +// returns true when it reaches the limits and can't fit more values. +// can be used to stop early during Collect without calling Exceeded. +func (d *ScopedDistinctString) Collect(scope string, val string) (exceeded bool) { d.mtx.Lock() defer d.mtx.Unlock() + if d.limExceeded { + return true + } + + valueLen := len(val) // can it fit? - if d.maxLen > 0 && d.curLen+len(val) > d.maxLen { - d.exceeded = true + if d.maxLen > 0 && d.curLen+valueLen > d.maxLen { // No - return + d.limExceeded = true + return true } // get or create collector col, ok := d.cols[scope] if !ok { - col = NewDistinctString(0) + col = d.newCol(0) d.cols[scope] = col } - added := col.Collect(val) - if added { - d.curLen += len(val) + // add valueLen if we successfully added the value + if col.Collect(val) { + d.curLen += valueLen } + return false } // Strings returns the final list of distinct values collected and sorted. @@ -60,22 +84,30 @@ func (d *ScopedDistinctString) Exceeded() bool { d.mtx.Lock() defer d.mtx.Unlock() - return d.exceeded + return d.limExceeded } -// Diff returns all new strings collected since the last time diff was called -func (d *ScopedDistinctString) Diff() map[string][]string { +// Diff returns all new strings collected since the last time Diff was called +func (d *ScopedDistinctString) Diff() (map[string][]string, error) { + if !d.diffEnabled { + return nil, errDiffNotEnabled + } + d.mtx.Lock() defer d.mtx.Unlock() ss := map[string][]string{} for k, v := range d.cols { - diff := v.Diff() + diff, err := v.Diff() + if err != nil { + return nil, err + } + if len(diff) > 0 { ss[k] = diff } } - return ss + return ss, nil } diff --git a/pkg/collector/scoped_distinct_string_test.go b/pkg/collector/scoped_distinct_string_test.go index 2e28191db6e..9b090cef442 100644 --- a/pkg/collector/scoped_distinct_string_test.go +++ b/pkg/collector/scoped_distinct_string_test.go @@ -3,6 +3,7 @@ package collector import ( "fmt" "slices" + "strconv" "sync" "testing" @@ -60,14 +61,18 @@ func TestScopedDistinct(t *testing.T) { } slices.Sort(keys) + var stop bool for _, k := range keys { v := tc.in[k] for _, val := range v { - c.Collect(k, val) + stop = c.Collect(k, val) } } + // check if we exceeded the limit, and Collect and Exceeded return the same value require.Equal(t, tc.exceeded, c.Exceeded()) + require.Equal(t, tc.exceeded, stop) + require.Equal(t, stop, c.Exceeded()) actual := c.Strings() assertMaps(t, tc.expected, actual) @@ -75,35 +80,35 @@ func TestScopedDistinct(t *testing.T) { } func TestScopedDistinctDiff(t *testing.T) { - c := NewScopedDistinctString(0) + c := NewScopedDistinctStringWithDiff(0) c.Collect("scope1", "val1") expected := map[string][]string{ "scope1": {"val1"}, } - assertMaps(t, expected, c.Diff()) + assertMaps(t, expected, readScopedDistinctStringDiff(t, c)) // no diff c.Collect("scope1", "val1") expected = map[string][]string{} - assertMaps(t, expected, c.Diff()) - assertMaps(t, map[string][]string{}, c.Diff()) + assertMaps(t, expected, readScopedDistinctStringDiff(t, c)) + assertMaps(t, map[string][]string{}, readScopedDistinctStringDiff(t, c)) // new value c.Collect("scope1", "val2") expected = map[string][]string{ "scope1": {"val2"}, } - assertMaps(t, expected, c.Diff()) - assertMaps(t, map[string][]string{}, c.Diff()) + assertMaps(t, expected, readScopedDistinctStringDiff(t, c)) + assertMaps(t, map[string][]string{}, readScopedDistinctStringDiff(t, c)) // new scope c.Collect("scope2", "val1") expected = map[string][]string{ "scope2": {"val1"}, } - assertMaps(t, expected, c.Diff()) - assertMaps(t, map[string][]string{}, c.Diff()) + assertMaps(t, expected, readScopedDistinctStringDiff(t, c)) + assertMaps(t, map[string][]string{}, readScopedDistinctStringDiff(t, c)) // all c.Collect("scope2", "val1") @@ -113,8 +118,21 @@ func TestScopedDistinctDiff(t *testing.T) { "scope1": {"val3"}, "scope2": {"val2"}, } - assertMaps(t, expected, c.Diff()) - assertMaps(t, map[string][]string{}, c.Diff()) + assertMaps(t, expected, readScopedDistinctStringDiff(t, c)) + assertMaps(t, map[string][]string{}, readScopedDistinctStringDiff(t, c)) + + // diff should error when diff is not enabled + col := NewScopedDistinctString(0) + col.Collect("scope1", "val1") + res, err := col.Diff() + require.Nil(t, res) + require.Error(t, err, errDiffNotEnabled) +} + +func readScopedDistinctStringDiff(t *testing.T, d *ScopedDistinctString) map[string][]string { + res, err := d.Diff() + require.NoError(t, err) + return res } func assertMaps(t *testing.T, expected, actual map[string][]string) { @@ -148,3 +166,60 @@ func TestScopedDistinctStringCollectorIsSafe(t *testing.T) { require.Equal(t, totalStrings, 10*100) require.False(t, d.Exceeded()) } + +func BenchmarkScopedDistinctStringCollect(b *testing.B) { + // simulate 100 ingesters, each returning 10_000 tags with various scopes + numIngesters := 100 + numTagsPerIngester := 10_000 + ingesterTags := make([]map[string][]string, numIngesters) + scopeTypes := []string{"resource", "span", "event", "instrumentation"} + + for i := 0; i < numIngesters; i++ { + tags := make(map[string][]string) + for j := 0; j < numTagsPerIngester; j++ { + scope := scopeTypes[j%len(scopeTypes)] + value := fmt.Sprintf("tag_%d_%d", i, j) + tags[scope] = append(tags[scope], value) + } + ingesterTags[i] = tags + } + + limits := []int{ + 0, // no limit + 100_000, // 100KB + 1_000_000, // 1MB + 10_000_000, // 10MB + } + + b.ResetTimer() // to exclude the setup time for generating tags + for _, lim := range limits { + b.Run("uniques_limit:"+strconv.Itoa(lim), func(b *testing.B) { + for n := 0; n < b.N; n++ { + scopedDistinctStrings := NewScopedDistinctString(lim) + for _, tags := range ingesterTags { + for scope, values := range tags { + for _, v := range values { + if scopedDistinctStrings.Collect(scope, v) { + break // stop early if limit is reached + } + } + } + } + } + }) + + b.Run("duplicates_limit:"+strconv.Itoa(lim), func(b *testing.B) { + for n := 0; n < b.N; n++ { + scopedDistinctStrings := NewScopedDistinctString(lim) + for i := 0; i < numIngesters; i++ { + for scope := range ingesterTags[i] { + // collect first item to simulate duplicates + if scopedDistinctStrings.Collect(scope, ingesterTags[i][scope][0]) { + break // stop early if limit is reached + } + } + } + } + }) + } +} diff --git a/pkg/httpclient/client.go b/pkg/httpclient/client.go index 9a29eabcc44..34bf40e81f1 100644 --- a/pkg/httpclient/client.go +++ b/pkg/httpclient/client.go @@ -50,6 +50,7 @@ type TempoHTTPClient interface { QueryTraceWithRange(id string, start int64, end int64) (*tempopb.Trace, error) SearchTraceQL(query string) (*tempopb.SearchResponse, error) SearchTraceQLWithRange(query string, start int64, end int64) (*tempopb.SearchResponse, error) + SearchTraceQLWithRangeAndLimit(query string, start int64, end int64, limit int64, spss int64) (*tempopb.SearchResponse, error) MetricsSummary(query string, groupBy string, start int64, end int64) (*tempopb.SpanMetricsSummaryResponse, error) GetOverrides() (*userconfigurableoverrides.Limits, string, error) SetOverrides(limits *userconfigurableoverrides.Limits, version string) (string, error) @@ -64,6 +65,7 @@ type Client struct { BaseURL string OrgID string client *http.Client + headers map[string]string } func New(baseURL, orgID string) *Client { @@ -80,6 +82,13 @@ func NewWithCompression(baseURL, orgID string) *Client { return c } +func (c *Client) SetHeader(key string, value string) { + if c.headers == nil { + c.headers = make(map[string]string) + } + c.headers[key] = value +} + func (c *Client) WithTransport(t http.RoundTripper) { c.client.Transport = t } @@ -129,6 +138,12 @@ func (c *Client) doRequest(req *http.Request) (*http.Response, []byte, error) { req.Header.Set(orgIDHeader, c.OrgID) } + if c.headers != nil { + for k, v := range c.headers { + req.Header.Set(k, v) + } + } + resp, err := http.DefaultClient.Do(req) if err != nil { return nil, nil, fmt.Errorf("error querying Tempo %v", err) @@ -225,7 +240,7 @@ func (c *Client) SearchTagValuesV2WithRange(tag string, start int64, end int64) // Search Tempo. tags must be in logfmt format, that is "key1=value1 key2=value2" func (c *Client) Search(tags string) (*tempopb.SearchResponse, error) { m := &tempopb.SearchResponse{} - _, err := c.getFor(c.buildSearchQueryURL("tags", tags, 0, 0), m) + _, err := c.getFor(c.buildSearchQueryURL("tags", tags, 0, 0, 0, 0), m) if err != nil { return nil, err } @@ -237,7 +252,7 @@ func (c *Client) Search(tags string) (*tempopb.SearchResponse, error) { // epoch timestamps in seconds. func (c *Client) SearchWithRange(tags string, start int64, end int64) (*tempopb.SearchResponse, error) { m := &tempopb.SearchResponse{} - _, err := c.getFor(c.buildSearchQueryURL("tags", tags, start, end), m) + _, err := c.getFor(c.buildSearchQueryURL("tags", tags, start, end, 0, 0), m) if err != nil { return nil, err } @@ -291,7 +306,7 @@ func (c *Client) QueryTraceWithRange(id string, start int64, end int64) (*tempop func (c *Client) SearchTraceQL(query string) (*tempopb.SearchResponse, error) { m := &tempopb.SearchResponse{} - _, err := c.getFor(c.buildSearchQueryURL("q", query, 0, 0), m) + _, err := c.getFor(c.buildSearchQueryURL("q", query, 0, 0, 0, 0), m) if err != nil { return nil, err } @@ -301,7 +316,17 @@ func (c *Client) SearchTraceQL(query string) (*tempopb.SearchResponse, error) { func (c *Client) SearchTraceQLWithRange(query string, start int64, end int64) (*tempopb.SearchResponse, error) { m := &tempopb.SearchResponse{} - _, err := c.getFor(c.buildSearchQueryURL("q", query, start, end), m) + _, err := c.getFor(c.buildSearchQueryURL("q", query, start, end, 0, 0), m) + if err != nil { + return nil, err + } + + return m, nil +} + +func (c *Client) SearchTraceQLWithRangeAndLimit(query string, start int64, end int64, limit int64, spss int64) (*tempopb.SearchResponse, error) { + m := &tempopb.SearchResponse{} + _, err := c.getFor(c.buildSearchQueryURL("q", query, start, end, limit, spss), m) if err != nil { return nil, err } @@ -329,13 +354,19 @@ func (c *Client) MetricsSummary(query string, groupBy string, start int64, end i return m, nil } -func (c *Client) buildSearchQueryURL(queryType string, query string, start int64, end int64) string { +func (c *Client) buildSearchQueryURL(queryType string, query string, start int64, end int64, limit int64, spss int64) string { joinURL, _ := url.Parse(c.BaseURL + "/api/search?") q := joinURL.Query() if start != 0 && end != 0 { q.Set("start", strconv.FormatInt(start, 10)) q.Set("end", strconv.FormatInt(end, 10)) } + if limit != 0 { + q.Set("limit", strconv.FormatInt(limit, 10)) + } + if spss != 0 { + q.Set("spss", strconv.FormatInt(spss, 10)) + } q.Set(queryType, query) joinURL.RawQuery = q.Encode() diff --git a/pkg/parquetquery/predicates.gen.go b/pkg/parquetquery/predicates.gen.go index aec9e0da5ec..b324a5e31c6 100644 --- a/pkg/parquetquery/predicates.gen.go +++ b/pkg/parquetquery/predicates.gen.go @@ -991,11 +991,30 @@ func (p ByteEqualPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool { if d := c.Dictionary(); d != nil { return keepDictionary(d, p.KeepValue) } + ci, err := c.ColumnIndex() + if err == nil && ci != nil { + for i := 0; i < ci.NumPages(); i++ { + min := ci.MinValue(i).ByteArray() + max := ci.MaxValue(i).ByteArray() + + if bytes.Compare(p.value, min) >= 0 && bytes.Compare(p.value, max) <= 0 { + return true + } + } + return false + } return true } func (p ByteEqualPredicate) KeepPage(page pq.Page) bool { + minV, maxV, ok := page.Bounds() + if ok { + min := minV.ByteArray() + max := maxV.ByteArray() + + return bytes.Compare(p.value, min) >= 0 && bytes.Compare(p.value, max) <= 0 + } return true } diff --git a/pkg/parquetquerygen/predicates.go b/pkg/parquetquerygen/predicates.go index 748432ec491..1a74540c1e4 100644 --- a/pkg/parquetquerygen/predicates.go +++ b/pkg/parquetquerygen/predicates.go @@ -252,7 +252,7 @@ func (p {{ $structName }}) KeepValue(v pq.Value) bool { { Op: "Equal", CompareCond: `bytes.Equal(bytes.TrimLeft(vv, "\x00"), p.value)`, - RangeCond: "", // benchmarks are generally better w/o a range condition? "bytes.Compare(p.value, min) >= 0 && bytes.Compare(p.value, max) <= 0", + RangeCond: "bytes.Compare(p.value, min) >= 0 && bytes.Compare(p.value, max) <= 0", }, { Op: "NotEqual", diff --git a/pkg/tempopb/tempo.pb.go b/pkg/tempopb/tempo.pb.go index 065003baac0..dd3cd274a52 100644 --- a/pkg/tempopb/tempo.pb.go +++ b/pkg/tempopb/tempo.pb.go @@ -1379,7 +1379,8 @@ func (m *SearchTagValuesBlockRequest) GetDedicatedColumns() []*DedicatedColumn { } type SearchTagsResponse struct { - TagNames []string `protobuf:"bytes,1,rep,name=tagNames,proto3" json:"tagNames,omitempty"` + TagNames []string `protobuf:"bytes,1,rep,name=tagNames,proto3" json:"tagNames,omitempty"` + Metrics *MetadataMetrics `protobuf:"bytes,2,opt,name=metrics,proto3" json:"metrics,omitempty"` } func (m *SearchTagsResponse) Reset() { *m = SearchTagsResponse{} } @@ -1422,8 +1423,16 @@ func (m *SearchTagsResponse) GetTagNames() []string { return nil } +func (m *SearchTagsResponse) GetMetrics() *MetadataMetrics { + if m != nil { + return m.Metrics + } + return nil +} + type SearchTagsV2Response struct { - Scopes []*SearchTagsV2Scope `protobuf:"bytes,1,rep,name=scopes,proto3" json:"scopes,omitempty"` + Scopes []*SearchTagsV2Scope `protobuf:"bytes,1,rep,name=scopes,proto3" json:"scopes,omitempty"` + Metrics *MetadataMetrics `protobuf:"bytes,2,opt,name=metrics,proto3" json:"metrics,omitempty"` } func (m *SearchTagsV2Response) Reset() { *m = SearchTagsV2Response{} } @@ -1466,6 +1475,13 @@ func (m *SearchTagsV2Response) GetScopes() []*SearchTagsV2Scope { return nil } +func (m *SearchTagsV2Response) GetMetrics() *MetadataMetrics { + if m != nil { + return m.Metrics + } + return nil +} + type SearchTagsV2Scope struct { Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` Tags []string `protobuf:"bytes,2,rep,name=tags,proto3" json:"tags,omitempty"` @@ -1587,7 +1603,8 @@ func (m *SearchTagValuesRequest) GetEnd() uint32 { } type SearchTagValuesResponse struct { - TagValues []string `protobuf:"bytes,1,rep,name=tagValues,proto3" json:"tagValues,omitempty"` + TagValues []string `protobuf:"bytes,1,rep,name=tagValues,proto3" json:"tagValues,omitempty"` + Metrics *MetadataMetrics `protobuf:"bytes,2,opt,name=metrics,proto3" json:"metrics,omitempty"` } func (m *SearchTagValuesResponse) Reset() { *m = SearchTagValuesResponse{} } @@ -1630,6 +1647,13 @@ func (m *SearchTagValuesResponse) GetTagValues() []string { return nil } +func (m *SearchTagValuesResponse) GetMetrics() *MetadataMetrics { + if m != nil { + return m.Metrics + } + return nil +} + type TagValue struct { Type string `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"` Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` @@ -1683,7 +1707,8 @@ func (m *TagValue) GetValue() string { } type SearchTagValuesV2Response struct { - TagValues []*TagValue `protobuf:"bytes,1,rep,name=tagValues,proto3" json:"tagValues,omitempty"` + TagValues []*TagValue `protobuf:"bytes,1,rep,name=tagValues,proto3" json:"tagValues,omitempty"` + Metrics *MetadataMetrics `protobuf:"bytes,2,opt,name=metrics,proto3" json:"metrics,omitempty"` } func (m *SearchTagValuesV2Response) Reset() { *m = SearchTagValuesV2Response{} } @@ -1726,6 +1751,89 @@ func (m *SearchTagValuesV2Response) GetTagValues() []*TagValue { return nil } +func (m *SearchTagValuesV2Response) GetMetrics() *MetadataMetrics { + if m != nil { + return m.Metrics + } + return nil +} + +type MetadataMetrics struct { + InspectedBytes uint64 `protobuf:"varint,1,opt,name=inspectedBytes,proto3" json:"inspectedBytes,omitempty"` + TotalJobs uint32 `protobuf:"varint,2,opt,name=totalJobs,proto3" json:"totalJobs,omitempty"` + CompletedJobs uint32 `protobuf:"varint,3,opt,name=completedJobs,proto3" json:"completedJobs,omitempty"` + TotalBlocks uint32 `protobuf:"varint,4,opt,name=totalBlocks,proto3" json:"totalBlocks,omitempty"` + TotalBlockBytes uint64 `protobuf:"varint,5,opt,name=totalBlockBytes,proto3" json:"totalBlockBytes,omitempty"` +} + +func (m *MetadataMetrics) Reset() { *m = MetadataMetrics{} } +func (m *MetadataMetrics) String() string { return proto.CompactTextString(m) } +func (*MetadataMetrics) ProtoMessage() {} +func (*MetadataMetrics) Descriptor() ([]byte, []int) { + return fileDescriptor_f22805646f4f62b6, []int{22} +} +func (m *MetadataMetrics) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *MetadataMetrics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_MetadataMetrics.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *MetadataMetrics) XXX_Merge(src proto.Message) { + xxx_messageInfo_MetadataMetrics.Merge(m, src) +} +func (m *MetadataMetrics) XXX_Size() int { + return m.Size() +} +func (m *MetadataMetrics) XXX_DiscardUnknown() { + xxx_messageInfo_MetadataMetrics.DiscardUnknown(m) +} + +var xxx_messageInfo_MetadataMetrics proto.InternalMessageInfo + +func (m *MetadataMetrics) GetInspectedBytes() uint64 { + if m != nil { + return m.InspectedBytes + } + return 0 +} + +func (m *MetadataMetrics) GetTotalJobs() uint32 { + if m != nil { + return m.TotalJobs + } + return 0 +} + +func (m *MetadataMetrics) GetCompletedJobs() uint32 { + if m != nil { + return m.CompletedJobs + } + return 0 +} + +func (m *MetadataMetrics) GetTotalBlocks() uint32 { + if m != nil { + return m.TotalBlocks + } + return 0 +} + +func (m *MetadataMetrics) GetTotalBlockBytes() uint64 { + if m != nil { + return m.TotalBlockBytes + } + return 0 +} + type Trace struct { ResourceSpans []*v11.ResourceSpans `protobuf:"bytes,1,rep,name=resourceSpans,proto3" json:"resourceSpans,omitempty"` } @@ -1734,7 +1842,7 @@ func (m *Trace) Reset() { *m = Trace{} } func (m *Trace) String() string { return proto.CompactTextString(m) } func (*Trace) ProtoMessage() {} func (*Trace) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{22} + return fileDescriptor_f22805646f4f62b6, []int{23} } func (m *Trace) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1779,7 +1887,7 @@ func (m *PushResponse) Reset() { *m = PushResponse{} } func (m *PushResponse) String() string { return proto.CompactTextString(m) } func (*PushResponse) ProtoMessage() {} func (*PushResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{23} + return fileDescriptor_f22805646f4f62b6, []int{24} } func (m *PushResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1831,7 +1939,7 @@ func (m *PushBytesRequest) Reset() { *m = PushBytesRequest{} } func (m *PushBytesRequest) String() string { return proto.CompactTextString(m) } func (*PushBytesRequest) ProtoMessage() {} func (*PushBytesRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{24} + return fileDescriptor_f22805646f4f62b6, []int{25} } func (m *PushBytesRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1869,7 +1977,7 @@ func (m *PushSpansRequest) Reset() { *m = PushSpansRequest{} } func (m *PushSpansRequest) String() string { return proto.CompactTextString(m) } func (*PushSpansRequest) ProtoMessage() {} func (*PushSpansRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{25} + return fileDescriptor_f22805646f4f62b6, []int{26} } func (m *PushSpansRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1914,7 +2022,7 @@ func (m *TraceBytes) Reset() { *m = TraceBytes{} } func (m *TraceBytes) String() string { return proto.CompactTextString(m) } func (*TraceBytes) ProtoMessage() {} func (*TraceBytes) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{26} + return fileDescriptor_f22805646f4f62b6, []int{27} } func (m *TraceBytes) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1962,7 +2070,7 @@ func (m *LinkSlice) Reset() { *m = LinkSlice{} } func (m *LinkSlice) String() string { return proto.CompactTextString(m) } func (*LinkSlice) ProtoMessage() {} func (*LinkSlice) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{27} + return fileDescriptor_f22805646f4f62b6, []int{28} } func (m *LinkSlice) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2010,7 +2118,7 @@ func (m *SpanMetricsRequest) Reset() { *m = SpanMetricsRequest{} } func (m *SpanMetricsRequest) String() string { return proto.CompactTextString(m) } func (*SpanMetricsRequest) ProtoMessage() {} func (*SpanMetricsRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{28} + return fileDescriptor_f22805646f4f62b6, []int{29} } func (m *SpanMetricsRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2086,7 +2194,7 @@ func (m *SpanMetricsSummaryRequest) Reset() { *m = SpanMetricsSummaryReq func (m *SpanMetricsSummaryRequest) String() string { return proto.CompactTextString(m) } func (*SpanMetricsSummaryRequest) ProtoMessage() {} func (*SpanMetricsSummaryRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{29} + return fileDescriptor_f22805646f4f62b6, []int{30} } func (m *SpanMetricsSummaryRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2161,7 +2269,7 @@ func (m *SpanMetricsResponse) Reset() { *m = SpanMetricsResponse{} } func (m *SpanMetricsResponse) String() string { return proto.CompactTextString(m) } func (*SpanMetricsResponse) ProtoMessage() {} func (*SpanMetricsResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{30} + return fileDescriptor_f22805646f4f62b6, []int{31} } func (m *SpanMetricsResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2227,7 +2335,7 @@ func (m *RawHistogram) Reset() { *m = RawHistogram{} } func (m *RawHistogram) String() string { return proto.CompactTextString(m) } func (*RawHistogram) ProtoMessage() {} func (*RawHistogram) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{31} + return fileDescriptor_f22805646f4f62b6, []int{32} } func (m *RawHistogram) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2279,7 +2387,7 @@ func (m *KeyValue) Reset() { *m = KeyValue{} } func (m *KeyValue) String() string { return proto.CompactTextString(m) } func (*KeyValue) ProtoMessage() {} func (*KeyValue) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{32} + return fileDescriptor_f22805646f4f62b6, []int{33} } func (m *KeyValue) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2332,7 +2440,7 @@ func (m *SpanMetrics) Reset() { *m = SpanMetrics{} } func (m *SpanMetrics) String() string { return proto.CompactTextString(m) } func (*SpanMetrics) ProtoMessage() {} func (*SpanMetrics) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{33} + return fileDescriptor_f22805646f4f62b6, []int{34} } func (m *SpanMetrics) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2396,7 +2504,7 @@ func (m *SpanMetricsSummary) Reset() { *m = SpanMetricsSummary{} } func (m *SpanMetricsSummary) String() string { return proto.CompactTextString(m) } func (*SpanMetricsSummary) ProtoMessage() {} func (*SpanMetricsSummary) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{34} + return fileDescriptor_f22805646f4f62b6, []int{35} } func (m *SpanMetricsSummary) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2482,7 +2590,7 @@ func (m *SpanMetricsSummaryResponse) Reset() { *m = SpanMetricsSummaryRe func (m *SpanMetricsSummaryResponse) String() string { return proto.CompactTextString(m) } func (*SpanMetricsSummaryResponse) ProtoMessage() {} func (*SpanMetricsSummaryResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{35} + return fileDescriptor_f22805646f4f62b6, []int{36} } func (m *SpanMetricsSummaryResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2533,7 +2641,7 @@ func (m *TraceQLStatic) Reset() { *m = TraceQLStatic{} } func (m *TraceQLStatic) String() string { return proto.CompactTextString(m) } func (*TraceQLStatic) ProtoMessage() {} func (*TraceQLStatic) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{36} + return fileDescriptor_f22805646f4f62b6, []int{37} } func (m *TraceQLStatic) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2627,7 +2735,7 @@ func (m *SpanMetricsData) Reset() { *m = SpanMetricsData{} } func (m *SpanMetricsData) String() string { return proto.CompactTextString(m) } func (*SpanMetricsData) ProtoMessage() {} func (*SpanMetricsData) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{37} + return fileDescriptor_f22805646f4f62b6, []int{38} } func (m *SpanMetricsData) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2680,7 +2788,7 @@ func (m *SpanMetricsResult) Reset() { *m = SpanMetricsResult{} } func (m *SpanMetricsResult) String() string { return proto.CompactTextString(m) } func (*SpanMetricsResult) ProtoMessage() {} func (*SpanMetricsResult) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{38} + return fileDescriptor_f22805646f4f62b6, []int{39} } func (m *SpanMetricsResult) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2741,7 +2849,7 @@ func (m *SpanMetricsResultPoint) Reset() { *m = SpanMetricsResultPoint{} func (m *SpanMetricsResultPoint) String() string { return proto.CompactTextString(m) } func (*SpanMetricsResultPoint) ProtoMessage() {} func (*SpanMetricsResultPoint) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{39} + return fileDescriptor_f22805646f4f62b6, []int{40} } func (m *SpanMetricsResultPoint) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2808,7 +2916,7 @@ func (m *QueryInstantRequest) Reset() { *m = QueryInstantRequest{} } func (m *QueryInstantRequest) String() string { return proto.CompactTextString(m) } func (*QueryInstantRequest) ProtoMessage() {} func (*QueryInstantRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{40} + return fileDescriptor_f22805646f4f62b6, []int{41} } func (m *QueryInstantRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2867,7 +2975,7 @@ func (m *QueryInstantResponse) Reset() { *m = QueryInstantResponse{} } func (m *QueryInstantResponse) String() string { return proto.CompactTextString(m) } func (*QueryInstantResponse) ProtoMessage() {} func (*QueryInstantResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{41} + return fileDescriptor_f22805646f4f62b6, []int{42} } func (m *QueryInstantResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2923,7 +3031,7 @@ func (m *InstantSeries) Reset() { *m = InstantSeries{} } func (m *InstantSeries) String() string { return proto.CompactTextString(m) } func (*InstantSeries) ProtoMessage() {} func (*InstantSeries) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{42} + return fileDescriptor_f22805646f4f62b6, []int{43} } func (m *InstantSeries) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2998,7 +3106,7 @@ func (m *QueryRangeRequest) Reset() { *m = QueryRangeRequest{} } func (m *QueryRangeRequest) String() string { return proto.CompactTextString(m) } func (*QueryRangeRequest) ProtoMessage() {} func (*QueryRangeRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{43} + return fileDescriptor_f22805646f4f62b6, []int{44} } func (m *QueryRangeRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -3134,7 +3242,7 @@ func (m *QueryRangeResponse) Reset() { *m = QueryRangeResponse{} } func (m *QueryRangeResponse) String() string { return proto.CompactTextString(m) } func (*QueryRangeResponse) ProtoMessage() {} func (*QueryRangeResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{44} + return fileDescriptor_f22805646f4f62b6, []int{45} } func (m *QueryRangeResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -3188,7 +3296,7 @@ func (m *Exemplar) Reset() { *m = Exemplar{} } func (m *Exemplar) String() string { return proto.CompactTextString(m) } func (*Exemplar) ProtoMessage() {} func (*Exemplar) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{45} + return fileDescriptor_f22805646f4f62b6, []int{46} } func (m *Exemplar) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -3248,7 +3356,7 @@ func (m *Sample) Reset() { *m = Sample{} } func (m *Sample) String() string { return proto.CompactTextString(m) } func (*Sample) ProtoMessage() {} func (*Sample) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{46} + return fileDescriptor_f22805646f4f62b6, []int{47} } func (m *Sample) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -3308,7 +3416,7 @@ func (m *TimeSeries) Reset() { *m = TimeSeries{} } func (m *TimeSeries) String() string { return proto.CompactTextString(m) } func (*TimeSeries) ProtoMessage() {} func (*TimeSeries) Descriptor() ([]byte, []int) { - return fileDescriptor_f22805646f4f62b6, []int{47} + return fileDescriptor_f22805646f4f62b6, []int{48} } func (m *TimeSeries) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -3394,6 +3502,7 @@ func init() { proto.RegisterType((*SearchTagValuesResponse)(nil), "tempopb.SearchTagValuesResponse") proto.RegisterType((*TagValue)(nil), "tempopb.TagValue") proto.RegisterType((*SearchTagValuesV2Response)(nil), "tempopb.SearchTagValuesV2Response") + proto.RegisterType((*MetadataMetrics)(nil), "tempopb.MetadataMetrics") proto.RegisterType((*Trace)(nil), "tempopb.Trace") proto.RegisterType((*PushResponse)(nil), "tempopb.PushResponse") proto.RegisterType((*PushBytesRequest)(nil), "tempopb.PushBytesRequest") @@ -3425,184 +3534,187 @@ func init() { func init() { proto.RegisterFile("pkg/tempopb/tempo.proto", fileDescriptor_f22805646f4f62b6) } var fileDescriptor_f22805646f4f62b6 = []byte{ - // 2818 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xec, 0x1a, 0x4d, 0x6f, 0x1b, 0xc7, - 0x55, 0x2b, 0x7e, 0x3f, 0x92, 0x12, 0x35, 0x56, 0x14, 0x9a, 0x76, 0x64, 0x65, 0x63, 0xa4, 0x6a, - 0x3e, 0x28, 0x99, 0xb1, 0x91, 0x38, 0x69, 0x53, 0x48, 0x16, 0xeb, 0x28, 0xd1, 0x57, 0x86, 0x8c, - 0x12, 0x14, 0x01, 0x84, 0x15, 0x39, 0xa6, 0x17, 0x22, 0x77, 0x99, 0xdd, 0xa5, 0x62, 0xf5, 0x50, - 0xa0, 0x05, 0x7a, 0x28, 0xd0, 0x43, 0x0f, 0xed, 0xa1, 0xc7, 0x9e, 0x8a, 0x9e, 0x7a, 0x68, 0xff, - 0x41, 0x81, 0x22, 0x40, 0xd1, 0x20, 0x40, 0x2f, 0x41, 0x0f, 0x41, 0x91, 0x1c, 0xda, 0x9f, 0x51, - 0xcc, 0x9b, 0x99, 0xdd, 0x59, 0x72, 0x29, 0xc5, 0x8d, 0x82, 0xe6, 0x90, 0x93, 0xe6, 0xbd, 0x7d, - 0xf3, 0xe6, 0xcd, 0xfb, 0x7e, 0x43, 0xc1, 0x93, 0xc3, 0x93, 0xde, 0x5a, 0xc0, 0x06, 0x43, 0x77, - 0x78, 0x2c, 0xfe, 0xd6, 0x87, 0x9e, 0x1b, 0xb8, 0x24, 0x27, 0x91, 0xb5, 0xa5, 0x8e, 0x3b, 0x18, - 0xb8, 0xce, 0xda, 0xe9, 0xad, 0x35, 0xb1, 0x12, 0x04, 0xb5, 0x17, 0x7b, 0x76, 0xf0, 0x70, 0x74, - 0x5c, 0xef, 0xb8, 0x83, 0xb5, 0x9e, 0xdb, 0x73, 0xd7, 0x10, 0x7d, 0x3c, 0x7a, 0x80, 0x10, 0x02, - 0xb8, 0x92, 0xe4, 0x8b, 0x81, 0x67, 0x75, 0x18, 0xe7, 0x82, 0x0b, 0x81, 0x35, 0xff, 0x6c, 0x40, - 0xa5, 0xcd, 0xe1, 0xcd, 0xb3, 0xed, 0x2d, 0xca, 0x3e, 0x18, 0x31, 0x3f, 0x20, 0x55, 0xc8, 0x21, - 0xcd, 0xf6, 0x56, 0xd5, 0x58, 0x31, 0x56, 0x4b, 0x54, 0x81, 0x64, 0x19, 0xe0, 0xb8, 0xef, 0x76, - 0x4e, 0x5a, 0x81, 0xe5, 0x05, 0xd5, 0xd9, 0x15, 0x63, 0xb5, 0x40, 0x35, 0x0c, 0xa9, 0x41, 0x1e, - 0xa1, 0xa6, 0xd3, 0xad, 0xa6, 0xf0, 0x6b, 0x08, 0x93, 0xeb, 0x50, 0xf8, 0x60, 0xc4, 0xbc, 0xb3, - 0x5d, 0xb7, 0xcb, 0xaa, 0x19, 0xfc, 0x18, 0x21, 0xc8, 0x0b, 0xb0, 0x60, 0xf5, 0xfb, 0xee, 0x87, - 0x07, 0x96, 0x17, 0xd8, 0x56, 0x1f, 0x65, 0xaa, 0x66, 0x57, 0x8c, 0xd5, 0x3c, 0x9d, 0xfc, 0x60, - 0xfe, 0xc7, 0x80, 0x05, 0x4d, 0x6c, 0x7f, 0xe8, 0x3a, 0x3e, 0x23, 0x37, 0x21, 0x83, 0x82, 0xa2, - 0xd4, 0xc5, 0xc6, 0x5c, 0x5d, 0xaa, 0xb0, 0x8e, 0xa4, 0x54, 0x7c, 0x24, 0x2f, 0x41, 0x6e, 0xc0, - 0x02, 0xcf, 0xee, 0xf8, 0x78, 0x81, 0x62, 0xe3, 0x6a, 0x9c, 0x8e, 0xb3, 0xdc, 0x15, 0x04, 0x54, - 0x51, 0x92, 0xbb, 0x90, 0xf5, 0x03, 0x2b, 0x18, 0xf9, 0x78, 0xad, 0xb9, 0xc6, 0xd3, 0x93, 0x7b, - 0x94, 0x18, 0xf5, 0x16, 0x12, 0x52, 0xb9, 0x81, 0x6b, 0x73, 0xc0, 0x7c, 0xdf, 0xea, 0xb1, 0x6a, - 0x1a, 0x6f, 0xad, 0x40, 0xf3, 0x19, 0xc8, 0x0a, 0x5a, 0x52, 0x82, 0xfc, 0xbd, 0xfd, 0xdd, 0x83, - 0x9d, 0x66, 0xbb, 0x59, 0x99, 0x21, 0x45, 0xc8, 0x1d, 0x6c, 0xd0, 0xf6, 0xf6, 0xc6, 0x4e, 0xc5, - 0x30, 0x89, 0x66, 0x20, 0x29, 0x96, 0xf9, 0xf1, 0x2c, 0x94, 0x5b, 0xcc, 0xf2, 0x3a, 0x0f, 0x95, - 0xc9, 0x5e, 0x85, 0x74, 0xdb, 0xea, 0xf9, 0x55, 0x63, 0x25, 0xb5, 0x5a, 0x6c, 0xac, 0x84, 0xd2, - 0xc5, 0xa8, 0xea, 0x9c, 0xa4, 0xe9, 0x04, 0xde, 0xd9, 0x66, 0xfa, 0xa3, 0xcf, 0x6e, 0xcc, 0x50, - 0xdc, 0x43, 0x6e, 0x42, 0x79, 0xd7, 0x76, 0xb6, 0x46, 0x9e, 0x15, 0xd8, 0xae, 0xb3, 0x2b, 0xd4, - 0x52, 0xa6, 0x71, 0x24, 0x52, 0x59, 0x8f, 0x34, 0xaa, 0x94, 0xa4, 0xd2, 0x91, 0x64, 0x11, 0x32, - 0x3b, 0xf6, 0xc0, 0x0e, 0xf0, 0xaa, 0x65, 0x2a, 0x00, 0x8e, 0xf5, 0xd1, 0x63, 0x32, 0x02, 0x8b, - 0x00, 0xa9, 0x40, 0x8a, 0x39, 0x5d, 0x34, 0x72, 0x99, 0xf2, 0x25, 0xa7, 0x7b, 0x9b, 0x7b, 0x44, - 0x35, 0x8f, 0x8a, 0x12, 0x00, 0x59, 0x85, 0xf9, 0xd6, 0xd0, 0x72, 0xfc, 0x03, 0xe6, 0xf1, 0xbf, - 0x2d, 0x16, 0x54, 0x0b, 0xb8, 0x67, 0x1c, 0x5d, 0x7b, 0x19, 0x0a, 0xe1, 0x15, 0x39, 0xfb, 0x13, - 0x76, 0x86, 0xbe, 0x50, 0xa0, 0x7c, 0xc9, 0xd9, 0x9f, 0x5a, 0xfd, 0x11, 0x93, 0x8e, 0x2b, 0x80, - 0x57, 0x67, 0x5f, 0x31, 0xcc, 0xbf, 0xa6, 0x80, 0x08, 0x55, 0x6d, 0x72, 0x77, 0x55, 0x5a, 0xbd, - 0x0d, 0x05, 0x5f, 0x29, 0x50, 0x3a, 0xd5, 0x52, 0xb2, 0x6a, 0x69, 0x44, 0xc8, 0x0d, 0x8e, 0x4e, - 0xbf, 0xbd, 0x25, 0x0f, 0x52, 0x20, 0x0f, 0x01, 0xbc, 0xfa, 0x01, 0x77, 0x06, 0xa1, 0xbf, 0x08, - 0xc1, 0x35, 0x3c, 0xb4, 0x7a, 0xcc, 0x6f, 0xbb, 0x82, 0xb5, 0xd4, 0x61, 0x1c, 0xc9, 0x43, 0x8c, - 0x39, 0x1d, 0xb7, 0x6b, 0x3b, 0x3d, 0x19, 0x45, 0x21, 0xcc, 0x39, 0xd8, 0x4e, 0x97, 0x3d, 0xe2, - 0xec, 0x5a, 0xf6, 0x8f, 0x99, 0xd4, 0x6d, 0x1c, 0x49, 0x4c, 0x28, 0x05, 0x6e, 0x60, 0xf5, 0x29, - 0xeb, 0xb8, 0x5e, 0xd7, 0xaf, 0xe6, 0x90, 0x28, 0x86, 0xe3, 0x34, 0x5d, 0x2b, 0xb0, 0x9a, 0xea, - 0x24, 0x61, 0x90, 0x18, 0x8e, 0xdf, 0xf3, 0x94, 0x79, 0xbe, 0xed, 0x3a, 0x68, 0x8f, 0x02, 0x55, - 0x20, 0x21, 0x90, 0xf6, 0xf9, 0xf1, 0xb0, 0x62, 0xac, 0xa6, 0x29, 0xae, 0x79, 0xea, 0x78, 0xe0, - 0xba, 0x01, 0xf3, 0x50, 0xb0, 0x22, 0x9e, 0xa9, 0x61, 0xc8, 0x16, 0x54, 0xba, 0xac, 0x6b, 0x77, - 0xac, 0x80, 0x75, 0xef, 0xb9, 0xfd, 0xd1, 0xc0, 0xf1, 0xab, 0x25, 0xf4, 0xe6, 0x6a, 0xa8, 0xf2, - 0xad, 0x38, 0x01, 0x9d, 0xd8, 0x61, 0xfe, 0xc5, 0x80, 0xf9, 0x31, 0x2a, 0x72, 0x1b, 0x32, 0x7e, - 0xc7, 0x1d, 0x32, 0x19, 0xba, 0xcb, 0xd3, 0xd8, 0xd5, 0x5b, 0x9c, 0x8a, 0x0a, 0x62, 0x7e, 0x07, - 0xc7, 0x1a, 0x28, 0x5f, 0xc1, 0x35, 0xb9, 0x05, 0xe9, 0xe0, 0x6c, 0x28, 0xf2, 0xcb, 0x5c, 0xe3, - 0xa9, 0xa9, 0x8c, 0xda, 0x67, 0x43, 0x46, 0x91, 0xd4, 0xbc, 0x01, 0x19, 0x64, 0x4b, 0xf2, 0x90, - 0x6e, 0x1d, 0x6c, 0xec, 0x55, 0x66, 0x78, 0xb0, 0xd3, 0x66, 0x6b, 0xff, 0x1d, 0x7a, 0xaf, 0x89, - 0xf1, 0x9d, 0xe6, 0xe4, 0x04, 0x20, 0xdb, 0x6a, 0xd3, 0xed, 0xbd, 0xfb, 0x95, 0x19, 0xf3, 0x11, - 0xcc, 0x29, 0xef, 0x92, 0xa9, 0xed, 0x36, 0x64, 0x31, 0x7b, 0xa9, 0x08, 0xbf, 0x1e, 0xcf, 0x3f, - 0x82, 0x7a, 0x97, 0x05, 0x16, 0xb7, 0x10, 0x95, 0xb4, 0x64, 0x7d, 0x3c, 0xd5, 0x8d, 0x7b, 0xef, - 0x78, 0x9e, 0x33, 0xff, 0x91, 0x82, 0x2b, 0x09, 0x1c, 0xc7, 0x4b, 0x42, 0x21, 0x2a, 0x09, 0xab, - 0x30, 0xef, 0xb9, 0x6e, 0xd0, 0x62, 0xde, 0xa9, 0xdd, 0x61, 0x7b, 0x91, 0xca, 0xc6, 0xd1, 0xdc, - 0x3b, 0x39, 0x0a, 0xd9, 0x23, 0x9d, 0xa8, 0x10, 0x71, 0x24, 0x2f, 0x04, 0x18, 0x12, 0x6d, 0x7b, - 0xc0, 0xde, 0x71, 0xec, 0x47, 0x7b, 0x96, 0xe3, 0x62, 0x24, 0xa4, 0xe9, 0xe4, 0x07, 0xee, 0x55, - 0xdd, 0x28, 0x25, 0x89, 0xf4, 0xa2, 0x61, 0xc8, 0x73, 0x90, 0xf3, 0x65, 0xce, 0xc8, 0xa2, 0x06, - 0x2a, 0x91, 0x06, 0x04, 0x9e, 0x2a, 0x02, 0xf2, 0x02, 0xe4, 0xe5, 0x92, 0xc7, 0x44, 0x2a, 0x91, - 0x38, 0xa4, 0x20, 0x14, 0x4a, 0xbe, 0xb8, 0x1c, 0xcf, 0xe1, 0x7e, 0x35, 0x8f, 0x3b, 0xea, 0xe7, - 0xd9, 0xa5, 0xde, 0xd2, 0x36, 0x60, 0x92, 0xa2, 0x31, 0x1e, 0xb5, 0x43, 0x58, 0x98, 0x20, 0x49, - 0xc8, 0x63, 0xcf, 0xeb, 0x79, 0xac, 0xd8, 0x78, 0x42, 0x33, 0x6a, 0xb4, 0x59, 0x4f, 0x6f, 0x3b, - 0x50, 0xd2, 0x3f, 0x61, 0x1e, 0x1a, 0x5a, 0xce, 0x3d, 0x77, 0xe4, 0x04, 0xc8, 0x98, 0xe7, 0x21, - 0x85, 0xe0, 0x3a, 0x65, 0x9e, 0xe7, 0x7a, 0xe2, 0xb3, 0x28, 0x06, 0x1a, 0xc6, 0xfc, 0xb9, 0x01, - 0x39, 0xa9, 0x0f, 0xf2, 0x0c, 0x64, 0xf8, 0x46, 0xe5, 0x96, 0xe5, 0x98, 0xc2, 0xa8, 0xf8, 0x86, - 0x15, 0xd0, 0x0a, 0x3a, 0x0f, 0x59, 0x57, 0x72, 0x53, 0x20, 0x79, 0x0d, 0xc0, 0x0a, 0x02, 0xcf, - 0x3e, 0x1e, 0x05, 0x8c, 0x57, 0x14, 0xce, 0xe3, 0x5a, 0xc8, 0x43, 0xb6, 0x3b, 0xa7, 0xb7, 0xea, - 0x6f, 0xb1, 0xb3, 0x43, 0x7e, 0x1b, 0xaa, 0x91, 0xf3, 0x58, 0x4f, 0xf3, 0x63, 0xc8, 0x12, 0x64, - 0xf9, 0x41, 0xa1, 0x6f, 0x4a, 0x28, 0x31, 0x84, 0x13, 0xdd, 0x2b, 0x35, 0xcd, 0xbd, 0x6e, 0x42, - 0x59, 0x39, 0x13, 0x87, 0x7d, 0xe9, 0x88, 0x71, 0xe4, 0xd8, 0x2d, 0x32, 0x8f, 0x77, 0x8b, 0xdf, - 0x86, 0xb5, 0x5c, 0x06, 0x23, 0x8f, 0x28, 0xdb, 0xf1, 0x87, 0xac, 0x13, 0xb0, 0x6e, 0x5b, 0x05, - 0x3d, 0xd6, 0xbb, 0x31, 0x34, 0x79, 0x16, 0xe6, 0x42, 0xd4, 0xe6, 0x19, 0x3f, 0x7c, 0x16, 0xe5, - 0x1b, 0xc3, 0x92, 0x15, 0x28, 0x62, 0x76, 0xc7, 0xe2, 0xa6, 0x2a, 0xb7, 0x8e, 0xe2, 0x17, 0xed, - 0xb8, 0x83, 0x61, 0x9f, 0x05, 0xac, 0xfb, 0xa6, 0x7b, 0xec, 0xab, 0xda, 0x13, 0x43, 0x72, 0xbf, - 0xc1, 0x4d, 0x48, 0x21, 0x82, 0x2d, 0x42, 0x70, 0xb9, 0x23, 0x96, 0x42, 0x9c, 0x2c, 0x8a, 0x33, - 0x8e, 0x8e, 0xc9, 0x8d, 0x35, 0x1c, 0x6b, 0x90, 0x2e, 0x37, 0x62, 0xcd, 0x1e, 0x8f, 0x07, 0xae, - 0x1a, 0x5e, 0xd5, 0x55, 0x51, 0x5e, 0x54, 0xe9, 0x5c, 0x18, 0x5b, 0xa6, 0xeb, 0x45, 0xc8, 0x60, - 0x33, 0xa9, 0x6a, 0x3b, 0x02, 0x51, 0xe3, 0x91, 0x4a, 0x68, 0x3c, 0xd2, 0x61, 0xe3, 0x61, 0x7e, - 0x9c, 0x82, 0xa5, 0xe8, 0xa4, 0x58, 0x0f, 0xf0, 0xca, 0x64, 0x0f, 0x50, 0x1b, 0xcb, 0xa2, 0x9a, - 0x74, 0xdf, 0xf6, 0x01, 0xdf, 0x8c, 0x3e, 0xe0, 0xd3, 0x14, 0x5c, 0x0b, 0x8d, 0x83, 0x41, 0x17, - 0xb7, 0xea, 0xf7, 0x27, 0xad, 0x7a, 0x63, 0xd2, 0xaa, 0x62, 0xe3, 0xb7, 0xa6, 0xfd, 0x46, 0x99, - 0x76, 0x5d, 0xb5, 0xea, 0x22, 0xec, 0x64, 0x83, 0x54, 0x83, 0x7c, 0x60, 0xf5, 0x78, 0x07, 0x21, - 0x6a, 0x51, 0x81, 0x86, 0xb0, 0xf9, 0x26, 0x2c, 0x46, 0x3b, 0x0e, 0x1b, 0xe1, 0x9e, 0x06, 0x64, - 0x31, 0x79, 0xa8, 0xea, 0x95, 0x14, 0xd7, 0x87, 0x0d, 0xd1, 0x15, 0x4a, 0x4a, 0xf3, 0x35, 0x3d, - 0x25, 0xc9, 0x8f, 0x61, 0xa1, 0x31, 0xb4, 0x42, 0x43, 0x20, 0x1d, 0xf0, 0x89, 0x6c, 0x16, 0x85, - 0xc1, 0xb5, 0x39, 0xd4, 0xb2, 0x4c, 0xcc, 0xb7, 0xb0, 0xbf, 0x12, 0xe2, 0x86, 0xfd, 0x95, 0x00, - 0x2f, 0x4a, 0x6c, 0xe9, 0x84, 0xc4, 0x96, 0x89, 0x12, 0xdb, 0xcb, 0xf0, 0xe4, 0xc4, 0x89, 0xf2, - 0xf6, 0x3c, 0x99, 0x2b, 0xa4, 0x54, 0x59, 0x84, 0x30, 0x6f, 0x43, 0x5e, 0x6d, 0xc1, 0xab, 0x9c, - 0x85, 0x09, 0x17, 0xd7, 0xc9, 0xb3, 0x94, 0xb9, 0x03, 0x57, 0xc7, 0x8e, 0xd3, 0xd4, 0xbd, 0x36, - 0x7e, 0x60, 0xb1, 0xb1, 0x10, 0xb5, 0x4b, 0xf2, 0x8b, 0x2e, 0xc3, 0x1e, 0x64, 0xb0, 0xd0, 0x91, - 0x26, 0x94, 0x3d, 0xe6, 0xbb, 0x23, 0xaf, 0xc3, 0x5a, 0x5a, 0xb7, 0x11, 0x45, 0xac, 0x78, 0xd2, - 0x38, 0xbd, 0x55, 0xa7, 0x3a, 0x19, 0x8d, 0xef, 0x32, 0xf7, 0xa0, 0x74, 0x30, 0xf2, 0xa3, 0xa6, - 0xfa, 0x75, 0x28, 0x63, 0x5b, 0xe3, 0x6f, 0x9e, 0xb5, 0xe5, 0xbb, 0x41, 0x6a, 0x75, 0x4e, 0x73, - 0x46, 0x4e, 0xdd, 0xe4, 0x14, 0x94, 0x59, 0xbe, 0xeb, 0xd0, 0x38, 0xb9, 0xf9, 0x3b, 0x03, 0x2a, - 0x9c, 0x04, 0x8b, 0x9a, 0xb2, 0xe4, 0x8b, 0x61, 0xa7, 0xce, 0x2d, 0x5f, 0xda, 0x7c, 0x82, 0x4f, - 0xda, 0xff, 0xfc, 0xec, 0x46, 0xf9, 0xc0, 0x63, 0x56, 0xbf, 0xef, 0x76, 0x04, 0xb5, 0x6a, 0xd1, - 0xbf, 0x03, 0x29, 0xbb, 0x2b, 0x5a, 0x9f, 0xa9, 0xb4, 0x9c, 0x82, 0xdc, 0x01, 0x10, 0xf9, 0x67, - 0xcb, 0x0a, 0xac, 0x6a, 0xfa, 0x3c, 0x7a, 0x8d, 0xd0, 0xdc, 0x15, 0x22, 0x0a, 0x7d, 0x48, 0x11, - 0xef, 0x42, 0xee, 0x18, 0x1b, 0xb0, 0x2f, 0xad, 0x48, 0x45, 0x6f, 0xde, 0x04, 0x90, 0xaf, 0x11, - 0xbc, 0x8e, 0x2f, 0xc5, 0xa6, 0x92, 0x92, 0xba, 0x94, 0xf9, 0x3a, 0x14, 0x76, 0x6c, 0xe7, 0xa4, - 0xd5, 0xb7, 0x3b, 0x7c, 0x68, 0xca, 0xf4, 0x6d, 0xe7, 0x44, 0x9d, 0x75, 0x6d, 0xf2, 0x2c, 0x7e, - 0x46, 0x9d, 0x6f, 0xa0, 0x82, 0xd2, 0xfc, 0x99, 0x01, 0x84, 0x23, 0xd5, 0x78, 0x12, 0x55, 0x7e, - 0x11, 0x0a, 0x86, 0x1e, 0x0a, 0x55, 0xc8, 0xf5, 0x3c, 0x77, 0x34, 0xdc, 0x54, 0x21, 0xa2, 0x40, - 0x4e, 0xdf, 0xc7, 0xc7, 0x08, 0xd1, 0xdf, 0x09, 0xe0, 0x4b, 0x87, 0xce, 0x2f, 0x0c, 0xb8, 0xaa, - 0x09, 0xd1, 0x1a, 0x0d, 0x06, 0x96, 0x77, 0xf6, 0xff, 0x91, 0xe5, 0x0f, 0x06, 0x5c, 0x89, 0x29, - 0x24, 0x8a, 0x61, 0xe6, 0x07, 0xf6, 0x80, 0xe7, 0x47, 0x94, 0x24, 0x4f, 0x23, 0x44, 0xbc, 0xcd, - 0x17, 0x9d, 0xa1, 0xd6, 0xe6, 0x3f, 0x0b, 0x73, 0xe8, 0xce, 0xad, 0x90, 0x44, 0x88, 0x36, 0x86, - 0x25, 0xf5, 0x68, 0x88, 0x4c, 0xa3, 0x05, 0x17, 0x63, 0x4d, 0xfe, 0xc4, 0x08, 0xf9, 0x3d, 0x28, - 0x51, 0xeb, 0xc3, 0x37, 0x6c, 0x3f, 0x70, 0x7b, 0x9e, 0x35, 0xe0, 0x4e, 0x72, 0x3c, 0xea, 0x9c, - 0x30, 0x31, 0x69, 0xa4, 0xa9, 0x84, 0xf8, 0xdd, 0x3b, 0x9a, 0x64, 0x02, 0x30, 0xdf, 0x84, 0xbc, - 0x6a, 0x93, 0x13, 0x26, 0x9f, 0x17, 0xe2, 0x93, 0xcf, 0x52, 0x7c, 0xda, 0x7a, 0x7b, 0x87, 0x8f, - 0x37, 0x76, 0x47, 0x65, 0xa3, 0x5f, 0x1b, 0x50, 0xd4, 0x44, 0x24, 0x9b, 0xb0, 0xd0, 0xb7, 0x02, - 0xe6, 0x74, 0xce, 0x8e, 0x1e, 0x2a, 0xf1, 0xa4, 0x57, 0x46, 0x33, 0x94, 0x2e, 0x3b, 0xad, 0x48, - 0xfa, 0xe8, 0x36, 0xdf, 0x85, 0xac, 0xcf, 0x3c, 0x5b, 0x86, 0xb7, 0x9e, 0xc1, 0xc2, 0xee, 0x5e, - 0x12, 0xf0, 0x8b, 0x8b, 0x7c, 0x21, 0x15, 0x2b, 0x21, 0xf3, 0xef, 0x71, 0xef, 0x96, 0x8e, 0x35, - 0x39, 0x94, 0x5d, 0x60, 0xad, 0xd9, 0x44, 0x6b, 0x45, 0xf2, 0xa5, 0x2e, 0x92, 0xaf, 0x02, 0xa9, - 0xe1, 0xdd, 0xbb, 0x72, 0xa4, 0xe1, 0x4b, 0x81, 0xb9, 0x83, 0x8e, 0x87, 0x98, 0x3b, 0x02, 0xb3, - 0x2e, 0xfb, 0x78, 0xbe, 0x44, 0xcc, 0x9d, 0x75, 0xd9, 0xb0, 0xf3, 0xa5, 0xf9, 0x2e, 0xd4, 0x92, - 0xe2, 0x44, 0xba, 0xe8, 0x5d, 0x28, 0xf8, 0x88, 0xb2, 0xd9, 0x64, 0x0a, 0x48, 0xd8, 0x17, 0x51, - 0x9b, 0xbf, 0x31, 0xa0, 0x1c, 0x33, 0x6c, 0xac, 0x12, 0x65, 0x64, 0x25, 0x2a, 0x81, 0xe1, 0xa0, - 0x32, 0x52, 0xd4, 0x70, 0x38, 0xf4, 0x00, 0xf5, 0x6d, 0x50, 0xe3, 0x01, 0x87, 0x7c, 0xf9, 0xea, - 0x6a, 0xf8, 0x1c, 0x3a, 0xc6, 0xcb, 0xe5, 0xa9, 0x71, 0xcc, 0xa1, 0xae, 0xbc, 0x98, 0xd1, 0xc5, - 0x19, 0x52, 0x3c, 0xf0, 0xe6, 0x90, 0xb7, 0x7a, 0xbd, 0x25, 0x90, 0x3e, 0xb1, 0x9d, 0x2e, 0x76, - 0x47, 0x19, 0x8a, 0x6b, 0x93, 0x89, 0x07, 0x49, 0x29, 0x38, 0x4f, 0xb3, 0xbc, 0xf5, 0xf1, 0x98, - 0x3f, 0xea, 0x07, 0xed, 0xa8, 0x50, 0x6a, 0x18, 0xde, 0x6a, 0x08, 0x48, 0xba, 0x4d, 0x2d, 0x31, - 0x86, 0x90, 0x82, 0x4a, 0x4a, 0x9e, 0x05, 0x17, 0x26, 0xbe, 0x72, 0x37, 0xe9, 0x5b, 0xc7, 0xac, - 0xaf, 0xf5, 0x0a, 0x11, 0x82, 0xcb, 0x81, 0xc0, 0xa1, 0x56, 0x9b, 0x35, 0x0c, 0x59, 0x83, 0xd9, - 0x40, 0xb9, 0xc6, 0x8d, 0xe9, 0x32, 0x1c, 0xb8, 0xb6, 0x13, 0xd0, 0xd9, 0xc0, 0xe7, 0x31, 0xb4, - 0x94, 0xfc, 0x19, 0x8d, 0x61, 0x4b, 0x21, 0xca, 0x14, 0xd7, 0xdc, 0x3b, 0x4e, 0xad, 0x3e, 0x1e, - 0x6c, 0x50, 0xbe, 0xe4, 0x53, 0x21, 0x7b, 0xc4, 0x06, 0xc3, 0xbe, 0xe5, 0xb5, 0xe5, 0x0b, 0x52, - 0x0a, 0x7f, 0x54, 0x18, 0x47, 0x93, 0xe7, 0xa0, 0xa2, 0x50, 0xea, 0x45, 0x59, 0x3a, 0xe7, 0x04, - 0xde, 0x6c, 0xc1, 0x15, 0x7c, 0x1c, 0xde, 0x76, 0xfc, 0xc0, 0x72, 0x82, 0xf3, 0xb3, 0x72, 0x98, - 0x65, 0x65, 0xa6, 0x89, 0x65, 0x59, 0x11, 0x9b, 0x98, 0x65, 0x1f, 0xc1, 0x62, 0x9c, 0xa9, 0x74, - 0xe1, 0x7a, 0x18, 0x53, 0xc2, 0x7f, 0xa3, 0xb4, 0x23, 0x29, 0x5b, 0xf8, 0x35, 0x0c, 0xac, 0xc7, - 0x7f, 0x76, 0xfb, 0xa9, 0x01, 0xe5, 0x18, 0x2f, 0x72, 0x17, 0xb2, 0x68, 0xb6, 0xc9, 0x98, 0x99, - 0x7c, 0x4f, 0x90, 0xaf, 0xf9, 0x72, 0x43, 0xbc, 0x35, 0x33, 0x64, 0x32, 0x24, 0x37, 0xa0, 0x38, - 0xf4, 0xdc, 0xc1, 0x91, 0xe4, 0x2a, 0xde, 0xde, 0x80, 0xa3, 0x76, 0x10, 0x63, 0xfe, 0x31, 0x05, - 0x0b, 0x78, 0x7d, 0x6a, 0x39, 0x3d, 0x76, 0x29, 0x1a, 0xc5, 0x29, 0x21, 0x60, 0x43, 0x69, 0x46, - 0x5c, 0xc7, 0x7f, 0x07, 0xca, 0x8d, 0xff, 0x0e, 0xa4, 0x4d, 0x56, 0xf9, 0x73, 0x26, 0xab, 0xc2, - 0x85, 0x93, 0x15, 0x24, 0x4d, 0x56, 0xda, 0x3c, 0x53, 0x8c, 0xcf, 0x33, 0xfa, 0xcc, 0x55, 0x1a, - 0x9b, 0xb9, 0xd4, 0xac, 0x53, 0x9e, 0x3a, 0xeb, 0xcc, 0x7d, 0xa9, 0x59, 0x67, 0xfe, 0x71, 0x67, - 0x1d, 0xac, 0xef, 0xd2, 0xf5, 0xfd, 0x6a, 0x45, 0xdc, 0x39, 0x44, 0x98, 0x3e, 0x10, 0xdd, 0x60, - 0xd2, 0x5b, 0x9f, 0x1f, 0xf3, 0xd6, 0x2b, 0x51, 0x91, 0xb4, 0x07, 0xec, 0x2b, 0xbb, 0xea, 0x4f, - 0x20, 0xdf, 0x94, 0x12, 0x5c, 0xbe, 0x93, 0x3e, 0x0d, 0x25, 0x9e, 0x46, 0xfc, 0xc0, 0x1a, 0x0c, - 0x8f, 0x06, 0xc2, 0x4b, 0x53, 0xb4, 0x18, 0xe2, 0x76, 0x7d, 0x73, 0x03, 0xb2, 0x2d, 0x6b, 0x30, - 0xec, 0x4f, 0x12, 0xcf, 0x4e, 0x10, 0x47, 0xa7, 0x18, 0xda, 0x29, 0xe6, 0x27, 0x06, 0x40, 0xa4, - 0x8b, 0xaf, 0x72, 0x8b, 0x35, 0xc8, 0xf9, 0x28, 0x8c, 0x6a, 0x07, 0xe6, 0x23, 0xf5, 0x21, 0x5e, - 0xd2, 0x2b, 0xaa, 0x0b, 0xa3, 0x90, 0xdc, 0xd1, 0x2d, 0x9e, 0x1e, 0x2b, 0xe1, 0x4a, 0xf1, 0x92, - 0x6b, 0x44, 0xf9, 0xdc, 0xfb, 0x30, 0x3f, 0x36, 0xac, 0x90, 0x12, 0xe4, 0xf7, 0xf6, 0x8f, 0x9a, - 0x94, 0xee, 0xd3, 0xca, 0x0c, 0xb9, 0x02, 0xf3, 0xbb, 0x1b, 0xef, 0x1d, 0xed, 0x6c, 0x1f, 0x36, - 0x8f, 0xda, 0x74, 0xe3, 0x5e, 0xb3, 0x55, 0x31, 0x38, 0x12, 0xd7, 0x47, 0xed, 0xfd, 0xfd, 0xa3, - 0x9d, 0x0d, 0x7a, 0xbf, 0x59, 0x99, 0x25, 0x0b, 0x50, 0x7e, 0x67, 0xef, 0xad, 0xbd, 0xfd, 0x77, - 0xf7, 0xe4, 0xe6, 0x54, 0xe3, 0x97, 0x06, 0x64, 0x39, 0x7b, 0xe6, 0x91, 0x1f, 0x40, 0x21, 0x1c, - 0x79, 0xc8, 0xd5, 0xd8, 0xa4, 0xa4, 0x8f, 0x41, 0xb5, 0x27, 0x62, 0x9f, 0x94, 0x73, 0x9a, 0x33, - 0x64, 0x03, 0x8a, 0x21, 0xf1, 0x61, 0xe3, 0x7f, 0x61, 0xd1, 0xf8, 0xb7, 0x01, 0x15, 0xe9, 0x97, - 0xf7, 0x99, 0xc3, 0x3c, 0x2b, 0x70, 0x43, 0xc1, 0x70, 0x5e, 0x19, 0xe3, 0xaa, 0x0f, 0x3f, 0xd3, - 0x05, 0xdb, 0x06, 0xb8, 0xcf, 0x02, 0xd5, 0x2b, 0x5e, 0x4b, 0x2e, 0x8e, 0x82, 0xc7, 0xf5, 0x29, - 0x95, 0x53, 0xb1, 0xba, 0x0f, 0x10, 0x05, 0x26, 0x89, 0x6a, 0xfd, 0x44, 0x7a, 0xad, 0x5d, 0x4b, - 0xfc, 0x16, 0xde, 0xf4, 0xf7, 0x69, 0xc8, 0xf1, 0x0f, 0x36, 0xf3, 0xc8, 0x1b, 0x50, 0xfe, 0xa1, - 0xed, 0x74, 0xc3, 0x1f, 0x83, 0xc9, 0xd5, 0xa4, 0xdf, 0xa0, 0x05, 0xdb, 0xda, 0xf4, 0x9f, 0xa7, - 0xd1, 0x04, 0x25, 0xf5, 0xf3, 0x52, 0x87, 0x39, 0x01, 0x99, 0xf2, 0x9b, 0x66, 0xed, 0xc9, 0x09, - 0x7c, 0xc8, 0xa2, 0x09, 0x45, 0xed, 0xf7, 0x52, 0x5d, 0x5b, 0x13, 0xbf, 0xa2, 0x9e, 0xc7, 0xe6, - 0x3e, 0x40, 0xf4, 0x9a, 0x42, 0xce, 0x79, 0x57, 0xad, 0x5d, 0x4b, 0xfc, 0x16, 0x32, 0x7a, 0x4b, - 0x5d, 0x49, 0x3c, 0xcb, 0x9c, 0xcb, 0xea, 0xa9, 0xc4, 0x67, 0x1e, 0x8d, 0xd9, 0x21, 0xcc, 0x8f, - 0xbd, 0x62, 0x90, 0x8b, 0x1e, 0x07, 0x6b, 0x2b, 0xd3, 0x09, 0x42, 0xbe, 0x3f, 0xd2, 0xde, 0x8e, - 0xd4, 0xeb, 0xc8, 0xc5, 0x9c, 0xcd, 0x69, 0x04, 0xba, 0xcc, 0x8d, 0xbf, 0xa5, 0xa1, 0xd2, 0x0a, - 0x3c, 0x66, 0x0d, 0x6c, 0xa7, 0xa7, 0x5c, 0xe6, 0x35, 0xc8, 0xca, 0xc2, 0xf7, 0xb8, 0x26, 0x5e, - 0x37, 0x78, 0x3c, 0x5c, 0x8a, 0x6d, 0xd6, 0x0d, 0xb2, 0x7b, 0x89, 0xd6, 0x59, 0x37, 0xc8, 0x7b, - 0x5f, 0x8f, 0x7d, 0xd6, 0x0d, 0xf2, 0xfe, 0xd7, 0x67, 0xa1, 0x75, 0x83, 0x1c, 0xc0, 0x82, 0xcc, - 0x15, 0x97, 0x92, 0x1d, 0xd6, 0x0d, 0x72, 0x08, 0x57, 0x74, 0x8e, 0xb2, 0x85, 0x24, 0xd7, 0xe3, - 0xfb, 0xe2, 0x4d, 0xb2, 0xa6, 0xe1, 0xa4, 0x6e, 0x97, 0xf3, 0x6d, 0xfc, 0xc9, 0x80, 0x9c, 0xca, - 0x84, 0x47, 0x89, 0xd3, 0xaa, 0x79, 0xde, 0x0c, 0x27, 0x0f, 0x7a, 0xe6, 0x5c, 0x9a, 0x4b, 0xcf, - 0x96, 0x9b, 0xd5, 0x8f, 0x3e, 0x5f, 0x36, 0x3e, 0xf9, 0x7c, 0xd9, 0xf8, 0xd7, 0xe7, 0xcb, 0xc6, - 0xaf, 0xbe, 0x58, 0x9e, 0xf9, 0xe4, 0x8b, 0xe5, 0x99, 0x4f, 0xbf, 0x58, 0x9e, 0x39, 0xce, 0xe2, - 0x7f, 0x3b, 0xbd, 0xf4, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0xdc, 0xc4, 0xa8, 0xe6, 0x6e, 0x25, - 0x00, 0x00, + // 2876 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xec, 0x5a, 0x4b, 0x6f, 0x23, 0xc7, + 0xf1, 0xd7, 0x88, 0xef, 0x22, 0x29, 0x51, 0xbd, 0xb2, 0xcc, 0xe5, 0xae, 0x25, 0x79, 0xbc, 0xf0, + 0x5f, 0x7f, 0x3f, 0x28, 0x2d, 0xbd, 0x8b, 0x78, 0xed, 0xc4, 0x81, 0xb4, 0x62, 0xd6, 0xb2, 0xf5, + 0x72, 0x93, 0x96, 0x8d, 0xc0, 0x80, 0x30, 0x22, 0x7b, 0xb9, 0x03, 0x91, 0x33, 0xf4, 0xcc, 0x50, + 0x5e, 0xe5, 0x60, 0x24, 0x01, 0x72, 0x08, 0x90, 0x43, 0x0e, 0xc9, 0x21, 0xc7, 0x9c, 0x82, 0x9c, + 0x72, 0x48, 0xbe, 0x41, 0x80, 0xc0, 0x41, 0x10, 0xc3, 0x40, 0x2e, 0x46, 0x0e, 0x46, 0x60, 0x1f, + 0x92, 0x8f, 0x11, 0x74, 0x75, 0xf7, 0xbc, 0x25, 0x79, 0xbd, 0x32, 0xe2, 0x83, 0x4f, 0xea, 0xae, + 0xf9, 0x75, 0x75, 0x75, 0x55, 0x75, 0x3d, 0x9a, 0x82, 0x27, 0xc7, 0xc7, 0x83, 0x55, 0x8f, 0x8d, + 0xc6, 0xf6, 0xf8, 0x48, 0xfc, 0x6d, 0x8e, 0x1d, 0xdb, 0xb3, 0x49, 0x41, 0x12, 0x1b, 0x0b, 0x3d, + 0x7b, 0x34, 0xb2, 0xad, 0xd5, 0x93, 0x9b, 0xab, 0x62, 0x24, 0x00, 0x8d, 0x17, 0x07, 0xa6, 0xf7, + 0x60, 0x72, 0xd4, 0xec, 0xd9, 0xa3, 0xd5, 0x81, 0x3d, 0xb0, 0x57, 0x91, 0x7c, 0x34, 0xb9, 0x8f, + 0x33, 0x9c, 0xe0, 0x48, 0xc2, 0xe7, 0x3d, 0xc7, 0xe8, 0x31, 0xce, 0x05, 0x07, 0x82, 0xaa, 0xff, + 0x49, 0x83, 0x5a, 0x97, 0xcf, 0x37, 0x4e, 0xb7, 0x36, 0x29, 0x7b, 0x7f, 0xc2, 0x5c, 0x8f, 0xd4, + 0xa1, 0x80, 0x98, 0xad, 0xcd, 0xba, 0xb6, 0xac, 0xad, 0x54, 0xa8, 0x9a, 0x92, 0x45, 0x80, 0xa3, + 0xa1, 0xdd, 0x3b, 0xee, 0x78, 0x86, 0xe3, 0xd5, 0xa7, 0x97, 0xb5, 0x95, 0x12, 0x0d, 0x51, 0x48, + 0x03, 0x8a, 0x38, 0x6b, 0x5b, 0xfd, 0x7a, 0x06, 0xbf, 0xfa, 0x73, 0x72, 0x1d, 0x4a, 0xef, 0x4f, + 0x98, 0x73, 0xba, 0x63, 0xf7, 0x59, 0x3d, 0x87, 0x1f, 0x03, 0x02, 0x79, 0x01, 0xe6, 0x8c, 0xe1, + 0xd0, 0xfe, 0x60, 0xdf, 0x70, 0x3c, 0xd3, 0x18, 0xa2, 0x4c, 0xf5, 0xfc, 0xb2, 0xb6, 0x52, 0xa4, + 0xc9, 0x0f, 0xfa, 0x7f, 0x34, 0x98, 0x0b, 0x89, 0xed, 0x8e, 0x6d, 0xcb, 0x65, 0xe4, 0x06, 0xe4, + 0x50, 0x50, 0x94, 0xba, 0xdc, 0x9a, 0x69, 0x4a, 0x15, 0x36, 0x11, 0x4a, 0xc5, 0x47, 0xf2, 0x12, + 0x14, 0x46, 0xcc, 0x73, 0xcc, 0x9e, 0x8b, 0x07, 0x28, 0xb7, 0xae, 0x46, 0x71, 0x9c, 0xe5, 0x8e, + 0x00, 0x50, 0x85, 0x24, 0x77, 0x20, 0xef, 0x7a, 0x86, 0x37, 0x71, 0xf1, 0x58, 0x33, 0xad, 0xa7, + 0x93, 0x6b, 0x94, 0x18, 0xcd, 0x0e, 0x02, 0xa9, 0x5c, 0xc0, 0xb5, 0x39, 0x62, 0xae, 0x6b, 0x0c, + 0x58, 0x3d, 0x8b, 0xa7, 0x56, 0x53, 0xfd, 0x19, 0xc8, 0x0b, 0x2c, 0xa9, 0x40, 0xf1, 0xee, 0xde, + 0xce, 0xfe, 0x76, 0xbb, 0xdb, 0xae, 0x4d, 0x91, 0x32, 0x14, 0xf6, 0xd7, 0x69, 0x77, 0x6b, 0x7d, + 0xbb, 0xa6, 0xe9, 0x24, 0x64, 0x20, 0x29, 0x96, 0xfe, 0xf1, 0x34, 0x54, 0x3b, 0xcc, 0x70, 0x7a, + 0x0f, 0x94, 0xc9, 0x5e, 0x81, 0x6c, 0xd7, 0x18, 0xb8, 0x75, 0x6d, 0x39, 0xb3, 0x52, 0x6e, 0x2d, + 0xfb, 0xd2, 0x45, 0x50, 0x4d, 0x0e, 0x69, 0x5b, 0x9e, 0x73, 0xba, 0x91, 0xfd, 0xe8, 0xb3, 0xa5, + 0x29, 0x8a, 0x6b, 0xc8, 0x0d, 0xa8, 0xee, 0x98, 0xd6, 0xe6, 0xc4, 0x31, 0x3c, 0xd3, 0xb6, 0x76, + 0x84, 0x5a, 0xaa, 0x34, 0x4a, 0x44, 0x94, 0xf1, 0x30, 0x84, 0xca, 0x48, 0x54, 0x98, 0x48, 0xe6, + 0x21, 0xb7, 0x6d, 0x8e, 0x4c, 0x0f, 0x8f, 0x5a, 0xa5, 0x62, 0xc2, 0xa9, 0x2e, 0x7a, 0x4c, 0x4e, + 0x50, 0x71, 0x42, 0x6a, 0x90, 0x61, 0x56, 0x1f, 0x8d, 0x5c, 0xa5, 0x7c, 0xc8, 0x71, 0x6f, 0x71, + 0x8f, 0xa8, 0x17, 0x51, 0x51, 0x62, 0x42, 0x56, 0x60, 0xb6, 0x33, 0x36, 0x2c, 0x77, 0x9f, 0x39, + 0xfc, 0x6f, 0x87, 0x79, 0xf5, 0x12, 0xae, 0x89, 0x93, 0x1b, 0xdf, 0x81, 0x92, 0x7f, 0x44, 0xce, + 0xfe, 0x98, 0x9d, 0xa2, 0x2f, 0x94, 0x28, 0x1f, 0x72, 0xf6, 0x27, 0xc6, 0x70, 0xc2, 0xa4, 0xe3, + 0x8a, 0xc9, 0x2b, 0xd3, 0x2f, 0x6b, 0xfa, 0x5f, 0x32, 0x40, 0x84, 0xaa, 0x36, 0xb8, 0xbb, 0x2a, + 0xad, 0xde, 0x82, 0x92, 0xab, 0x14, 0x28, 0x9d, 0x6a, 0x21, 0x5d, 0xb5, 0x34, 0x00, 0x72, 0x83, + 0xa3, 0xd3, 0x6f, 0x6d, 0xca, 0x8d, 0xd4, 0x94, 0x5f, 0x01, 0x3c, 0xfa, 0x3e, 0x77, 0x06, 0xa1, + 0xbf, 0x80, 0xc0, 0x35, 0x3c, 0x36, 0x06, 0xcc, 0xed, 0xda, 0x82, 0xb5, 0xd4, 0x61, 0x94, 0xc8, + 0xaf, 0x18, 0xb3, 0x7a, 0x76, 0xdf, 0xb4, 0x06, 0xf2, 0x16, 0xf9, 0x73, 0xce, 0xc1, 0xb4, 0xfa, + 0xec, 0x21, 0x67, 0xd7, 0x31, 0x7f, 0xc4, 0xa4, 0x6e, 0xa3, 0x44, 0xa2, 0x43, 0xc5, 0xb3, 0x3d, + 0x63, 0x48, 0x59, 0xcf, 0x76, 0xfa, 0x6e, 0xbd, 0x80, 0xa0, 0x08, 0x8d, 0x63, 0xfa, 0x86, 0x67, + 0xb4, 0xd5, 0x4e, 0xc2, 0x20, 0x11, 0x1a, 0x3f, 0xe7, 0x09, 0x73, 0x5c, 0xd3, 0xb6, 0xd0, 0x1e, + 0x25, 0xaa, 0xa6, 0x84, 0x40, 0xd6, 0xe5, 0xdb, 0xc3, 0xb2, 0xb6, 0x92, 0xa5, 0x38, 0xe6, 0xa1, + 0xe3, 0xbe, 0x6d, 0x7b, 0xcc, 0x41, 0xc1, 0xca, 0xb8, 0x67, 0x88, 0x42, 0x36, 0xa1, 0xd6, 0x67, + 0x7d, 0xb3, 0x67, 0x78, 0xac, 0x7f, 0xd7, 0x1e, 0x4e, 0x46, 0x96, 0x5b, 0xaf, 0xa0, 0x37, 0xd7, + 0x7d, 0x95, 0x6f, 0x46, 0x01, 0x34, 0xb1, 0x42, 0xff, 0xb3, 0x06, 0xb3, 0x31, 0x14, 0xb9, 0x05, + 0x39, 0xb7, 0x67, 0x8f, 0x99, 0xbc, 0xba, 0x8b, 0x67, 0xb1, 0x6b, 0x76, 0x38, 0x8a, 0x0a, 0x30, + 0x3f, 0x83, 0x65, 0x8c, 0x94, 0xaf, 0xe0, 0x98, 0xdc, 0x84, 0xac, 0x77, 0x3a, 0x16, 0xf1, 0x65, + 0xa6, 0xf5, 0xd4, 0x99, 0x8c, 0xba, 0xa7, 0x63, 0x46, 0x11, 0xaa, 0x2f, 0x41, 0x0e, 0xd9, 0x92, + 0x22, 0x64, 0x3b, 0xfb, 0xeb, 0xbb, 0xb5, 0x29, 0x7e, 0xd9, 0x69, 0xbb, 0xb3, 0xf7, 0x36, 0xbd, + 0xdb, 0xc6, 0xfb, 0x9d, 0xe5, 0x70, 0x02, 0x90, 0xef, 0x74, 0xe9, 0xd6, 0xee, 0xbd, 0xda, 0x94, + 0xfe, 0x10, 0x66, 0x94, 0x77, 0xc9, 0xd0, 0x76, 0x0b, 0xf2, 0x18, 0xbd, 0xd4, 0x0d, 0xbf, 0x1e, + 0x8d, 0x3f, 0x02, 0xbd, 0xc3, 0x3c, 0x83, 0x5b, 0x88, 0x4a, 0x2c, 0x59, 0x8b, 0x87, 0xba, 0xb8, + 0xf7, 0xc6, 0xe3, 0x9c, 0xfe, 0x8f, 0x0c, 0x5c, 0x49, 0xe1, 0x18, 0x4f, 0x09, 0xa5, 0x20, 0x25, + 0xac, 0xc0, 0xac, 0x63, 0xdb, 0x5e, 0x87, 0x39, 0x27, 0x66, 0x8f, 0xed, 0x06, 0x2a, 0x8b, 0x93, + 0xb9, 0x77, 0x72, 0x12, 0xb2, 0x47, 0x9c, 0xc8, 0x10, 0x51, 0x22, 0x4f, 0x04, 0x78, 0x25, 0xba, + 0xe6, 0x88, 0xbd, 0x6d, 0x99, 0x0f, 0x77, 0x0d, 0xcb, 0xc6, 0x9b, 0x90, 0xa5, 0xc9, 0x0f, 0xdc, + 0xab, 0xfa, 0x41, 0x48, 0x12, 0xe1, 0x25, 0x44, 0x21, 0xcf, 0x41, 0xc1, 0x95, 0x31, 0x23, 0x8f, + 0x1a, 0xa8, 0x05, 0x1a, 0x10, 0x74, 0xaa, 0x00, 0xe4, 0x05, 0x28, 0xca, 0x21, 0xbf, 0x13, 0x99, + 0x54, 0xb0, 0x8f, 0x20, 0x14, 0x2a, 0xae, 0x38, 0x1c, 0x8f, 0xe1, 0x6e, 0xbd, 0x88, 0x2b, 0x9a, + 0xe7, 0xd9, 0xa5, 0xd9, 0x09, 0x2d, 0xc0, 0x20, 0x45, 0x23, 0x3c, 0x1a, 0x07, 0x30, 0x97, 0x80, + 0xa4, 0xc4, 0xb1, 0xe7, 0xc3, 0x71, 0xac, 0xdc, 0x7a, 0x22, 0x64, 0xd4, 0x60, 0x71, 0x38, 0xbc, + 0x6d, 0x43, 0x25, 0xfc, 0x09, 0xe3, 0xd0, 0xd8, 0xb0, 0xee, 0xda, 0x13, 0xcb, 0x43, 0xc6, 0x3c, + 0x0e, 0x29, 0x02, 0xd7, 0x29, 0x73, 0x1c, 0xdb, 0x11, 0x9f, 0x45, 0x32, 0x08, 0x51, 0xf4, 0x9f, + 0x69, 0x50, 0x90, 0xfa, 0x20, 0xcf, 0x40, 0x8e, 0x2f, 0x54, 0x6e, 0x59, 0x8d, 0x28, 0x8c, 0x8a, + 0x6f, 0x98, 0x01, 0x0d, 0xaf, 0xf7, 0x80, 0xf5, 0x25, 0x37, 0x35, 0x25, 0xaf, 0x02, 0x18, 0x9e, + 0xe7, 0x98, 0x47, 0x13, 0x8f, 0xf1, 0x8c, 0xc2, 0x79, 0x5c, 0xf3, 0x79, 0xc8, 0x72, 0xe7, 0xe4, + 0x66, 0xf3, 0x4d, 0x76, 0x7a, 0xc0, 0x4f, 0x43, 0x43, 0x70, 0x7e, 0xd7, 0xb3, 0x7c, 0x1b, 0xb2, + 0x00, 0x79, 0xbe, 0x91, 0xef, 0x9b, 0x72, 0x96, 0x7a, 0x85, 0x53, 0xdd, 0x2b, 0x73, 0x96, 0x7b, + 0xdd, 0x80, 0xaa, 0x72, 0x26, 0x3e, 0x77, 0xa5, 0x23, 0x46, 0x89, 0xb1, 0x53, 0xe4, 0x1e, 0xed, + 0x14, 0xbf, 0xf1, 0x73, 0xb9, 0xbc, 0x8c, 0xfc, 0x46, 0x99, 0x96, 0x3b, 0x66, 0x3d, 0x8f, 0xf5, + 0xbb, 0xea, 0xd2, 0x63, 0xbe, 0x8b, 0x91, 0xc9, 0xb3, 0x30, 0xe3, 0x93, 0x36, 0x4e, 0xf9, 0xe6, + 0xd3, 0x28, 0x5f, 0x8c, 0x4a, 0x96, 0xa1, 0x8c, 0xd1, 0x1d, 0x93, 0x9b, 0xca, 0xdc, 0x61, 0x12, + 0x3f, 0x68, 0xcf, 0x1e, 0x8d, 0x87, 0xcc, 0x63, 0xfd, 0x37, 0xec, 0x23, 0x57, 0xe5, 0x9e, 0x08, + 0x91, 0xfb, 0x0d, 0x2e, 0x42, 0x84, 0xb8, 0x6c, 0x01, 0x81, 0xcb, 0x1d, 0xb0, 0x14, 0xe2, 0xe4, + 0x51, 0x9c, 0x38, 0x39, 0x22, 0x37, 0xe6, 0x70, 0xcc, 0x41, 0x61, 0xb9, 0x91, 0xaa, 0x0f, 0xf8, + 0x7d, 0xe0, 0xaa, 0xe1, 0x59, 0x5d, 0x25, 0xe5, 0x79, 0x15, 0xce, 0x85, 0xb1, 0x65, 0xb8, 0x9e, + 0x87, 0x1c, 0x16, 0x93, 0x2a, 0xb7, 0xe3, 0x24, 0x28, 0x3c, 0x32, 0x29, 0x85, 0x47, 0xd6, 0x2f, + 0x3c, 0xf4, 0x8f, 0x33, 0xb0, 0x10, 0xec, 0x14, 0xa9, 0x01, 0x5e, 0x4e, 0xd6, 0x00, 0x8d, 0x58, + 0x14, 0x0d, 0x49, 0xf7, 0x6d, 0x1d, 0xf0, 0xcd, 0xa8, 0x03, 0x3e, 0xcd, 0xc0, 0x35, 0xdf, 0x38, + 0x78, 0xe9, 0xa2, 0x56, 0xfd, 0x5e, 0xd2, 0xaa, 0x4b, 0x49, 0xab, 0x8a, 0x85, 0xdf, 0x9a, 0xf6, + 0x1b, 0x65, 0xda, 0xbe, 0x2a, 0xd5, 0xc5, 0xb5, 0x93, 0x05, 0x52, 0x03, 0x8a, 0x9e, 0x31, 0xe0, + 0x15, 0x84, 0xc8, 0x45, 0x25, 0xea, 0xcf, 0x49, 0x2b, 0x5e, 0x06, 0x05, 0xdb, 0xa9, 0xd4, 0x9c, + 0x28, 0x84, 0x3e, 0x84, 0xf9, 0x60, 0x97, 0x83, 0x96, 0xbf, 0x4f, 0x0b, 0xf2, 0x18, 0x70, 0x54, + 0xc6, 0x4b, 0x8b, 0x05, 0x07, 0x2d, 0x51, 0x49, 0x4a, 0xe4, 0x57, 0xda, 0xff, 0xd5, 0x70, 0xe8, + 0x93, 0x0c, 0xfd, 0x84, 0xa6, 0x85, 0x12, 0x1a, 0x81, 0xac, 0xc7, 0x3b, 0xbf, 0x69, 0x3c, 0x34, + 0x8e, 0xf5, 0x71, 0x28, 0x9a, 0x45, 0x7c, 0x18, 0xeb, 0x38, 0xa1, 0x16, 0xbf, 0x8e, 0x13, 0xd3, + 0x8b, 0x02, 0x68, 0x36, 0x25, 0x80, 0xe6, 0x82, 0x00, 0x7a, 0x0c, 0x4f, 0x26, 0x76, 0x94, 0x1a, + 0xe3, 0x49, 0x43, 0x11, 0xa5, 0x69, 0x02, 0xc2, 0x57, 0xd2, 0xcd, 0x2d, 0x28, 0xaa, 0x6d, 0xf0, + 0xf8, 0xa7, 0x7e, 0x32, 0xc0, 0x71, 0x7a, 0x9f, 0xa7, 0xff, 0x58, 0x83, 0xab, 0x31, 0x19, 0x43, + 0x76, 0x5d, 0x8d, 0x4b, 0x59, 0x6e, 0xcd, 0x05, 0xb5, 0x9c, 0xfc, 0xf2, 0xb8, 0x82, 0xff, 0x55, + 0x83, 0xd9, 0xd8, 0xc7, 0x94, 0x1c, 0xae, 0xa5, 0xe6, 0xf0, 0x48, 0xee, 0x9d, 0x8e, 0xe7, 0xde, + 0x44, 0xfe, 0xce, 0xa4, 0xe5, 0xef, 0x58, 0x1d, 0x90, 0x4d, 0xd6, 0x01, 0x29, 0x39, 0x3c, 0x97, + 0x9a, 0xc3, 0xf5, 0x5d, 0xc8, 0x61, 0x15, 0x42, 0xda, 0x50, 0x75, 0x98, 0x6b, 0x4f, 0x9c, 0x1e, + 0xeb, 0x84, 0x4a, 0xc1, 0x20, 0x9c, 0x8a, 0xf7, 0xa6, 0x93, 0x9b, 0x4d, 0x1a, 0x86, 0xd1, 0xe8, + 0x2a, 0x7d, 0x17, 0x2a, 0xfb, 0x13, 0x37, 0xe8, 0x78, 0x5e, 0x83, 0x2a, 0xd6, 0x9c, 0xee, 0xc6, + 0x69, 0x57, 0x3e, 0xea, 0x64, 0x56, 0x66, 0x42, 0x5a, 0xe6, 0xe8, 0x36, 0x47, 0x50, 0x66, 0xb8, + 0xb6, 0x45, 0xa3, 0x70, 0xfd, 0xb7, 0x1a, 0xd4, 0x38, 0x04, 0xa5, 0x55, 0xee, 0xff, 0xa2, 0xdf, + 0x46, 0xf1, 0xeb, 0x52, 0xd9, 0x78, 0xe2, 0xa3, 0xcf, 0x96, 0xa6, 0xfe, 0xf9, 0xd9, 0x52, 0x75, + 0xdf, 0x61, 0xc6, 0x70, 0x68, 0xf7, 0x04, 0x5a, 0xf5, 0x4f, 0xff, 0x07, 0x19, 0xb3, 0x2f, 0xea, + 0xd2, 0x33, 0xb1, 0x1c, 0x41, 0x6e, 0x03, 0x88, 0xe4, 0xb0, 0x69, 0x78, 0x46, 0x3d, 0x7b, 0x1e, + 0x3e, 0x04, 0xd4, 0x77, 0x84, 0x88, 0x42, 0x1f, 0x52, 0xc4, 0x3b, 0x50, 0x38, 0xc2, 0xea, 0xf8, + 0x4b, 0x2b, 0x52, 0xe1, 0xf5, 0x1b, 0x00, 0xf2, 0xa9, 0x88, 0x3b, 0xcc, 0x42, 0xa4, 0x65, 0xac, + 0xa8, 0x43, 0xe9, 0xaf, 0x41, 0x69, 0xdb, 0xb4, 0x8e, 0x3b, 0x43, 0xb3, 0xc7, 0x3b, 0xda, 0xdc, + 0xd0, 0xb4, 0x8e, 0xd5, 0x5e, 0xd7, 0x92, 0x7b, 0xf1, 0x3d, 0x9a, 0x7c, 0x01, 0x15, 0x48, 0xfd, + 0xa7, 0x1a, 0x10, 0x4e, 0x54, 0xde, 0x1d, 0x94, 0x65, 0x22, 0x7e, 0x68, 0xe1, 0xf8, 0x51, 0x87, + 0xc2, 0xc0, 0xb1, 0x27, 0xe3, 0x0d, 0x15, 0x57, 0xd4, 0x94, 0xe3, 0x87, 0xf8, 0x52, 0x24, 0x8a, + 0x6f, 0x31, 0xf9, 0xd2, 0xf1, 0xe6, 0xe7, 0xfc, 0x32, 0x07, 0x42, 0x74, 0x26, 0xa3, 0x91, 0xe1, + 0x9c, 0xfe, 0x6f, 0x64, 0xf9, 0xbd, 0x06, 0x57, 0x22, 0x0a, 0x09, 0x02, 0x1f, 0x73, 0x3d, 0x73, + 0xc4, 0x93, 0x17, 0x4a, 0x52, 0xa4, 0x01, 0x21, 0xda, 0x83, 0x89, 0xb2, 0x3d, 0xd4, 0x83, 0x3d, + 0x0b, 0x33, 0xe8, 0xce, 0x1d, 0x1f, 0x22, 0x44, 0x8b, 0x51, 0x49, 0x33, 0x88, 0x42, 0x59, 0xb4, + 0xe0, 0x7c, 0xa4, 0x03, 0x4b, 0x44, 0xa0, 0xef, 0x42, 0x85, 0x1a, 0x1f, 0xbc, 0x6e, 0xba, 0x9e, + 0x3d, 0x70, 0x8c, 0x11, 0x77, 0x92, 0xa3, 0x49, 0xef, 0x98, 0x79, 0x32, 0xea, 0xc8, 0x19, 0x3f, + 0x7b, 0x2f, 0x24, 0x99, 0x98, 0xe8, 0x6f, 0x40, 0x51, 0xf5, 0x30, 0x29, 0x6d, 0xe9, 0x0b, 0xd1, + 0xb6, 0x74, 0x21, 0xda, 0x0a, 0xbf, 0xb5, 0xcd, 0x7b, 0x4f, 0xb3, 0xa7, 0xc2, 0xf1, 0xaf, 0x34, + 0x28, 0x87, 0x44, 0x24, 0x1b, 0x30, 0x37, 0x34, 0x3c, 0x66, 0xf5, 0x4e, 0x0f, 0x1f, 0x28, 0xf1, + 0xa4, 0x57, 0x06, 0x0d, 0x6e, 0x58, 0x76, 0x5a, 0x93, 0xf8, 0xe0, 0x34, 0xff, 0x0f, 0x79, 0x97, + 0x39, 0xa6, 0xbc, 0xde, 0xe1, 0x08, 0xee, 0xb7, 0x5e, 0x12, 0xc0, 0x0f, 0x2e, 0xe2, 0x85, 0x54, + 0xac, 0x9c, 0xe9, 0x7f, 0x8f, 0x7a, 0xb7, 0x74, 0xac, 0x64, 0xc7, 0x7c, 0x81, 0xb5, 0xa6, 0x53, + 0xad, 0x15, 0xc8, 0x97, 0xb9, 0x48, 0xbe, 0x1a, 0x64, 0xc6, 0x77, 0xee, 0xc8, 0x7e, 0x93, 0x0f, + 0x05, 0xe5, 0xb6, 0x0c, 0xc7, 0x7c, 0x28, 0x28, 0x6b, 0xb2, 0xc9, 0xe2, 0x43, 0xa4, 0xdc, 0x5e, + 0x93, 0xdd, 0x14, 0x1f, 0xea, 0xef, 0x40, 0x23, 0xed, 0x9e, 0x48, 0x17, 0xbd, 0x03, 0x25, 0x17, + 0x49, 0x26, 0x4b, 0x86, 0x80, 0x94, 0x75, 0x01, 0x5a, 0xff, 0xb5, 0x06, 0xd5, 0x88, 0x61, 0x23, + 0xa9, 0x38, 0x27, 0x53, 0x71, 0x05, 0x34, 0x0b, 0x95, 0x91, 0xa1, 0x9a, 0xc5, 0x67, 0xf7, 0x51, + 0xdf, 0x1a, 0xd5, 0xee, 0xf3, 0x99, 0x2b, 0x9f, 0xc4, 0x35, 0x97, 0xcf, 0x8e, 0xf0, 0x70, 0x45, + 0xaa, 0x1d, 0xf1, 0x59, 0x5f, 0x1e, 0x4c, 0xeb, 0x63, 0x83, 0x2f, 0x5e, 0xdf, 0x0b, 0xc8, 0x5b, + 0x3d, 0xad, 0x13, 0xc8, 0x1e, 0x9b, 0x56, 0x1f, 0x4b, 0xd7, 0x1c, 0xc5, 0xb1, 0xce, 0xc4, 0x6b, + 0xb1, 0x14, 0x9c, 0x87, 0x59, 0x5e, 0x97, 0x3a, 0xcc, 0x9d, 0x0c, 0xbd, 0x6e, 0x50, 0x29, 0x84, + 0x28, 0xbc, 0xa6, 0x13, 0x33, 0xe9, 0x36, 0x8d, 0xd4, 0x3b, 0x84, 0x08, 0x2a, 0x91, 0x3c, 0x0a, + 0xce, 0x25, 0xbe, 0x72, 0x37, 0x19, 0x1a, 0x47, 0x6c, 0x18, 0x2a, 0xb0, 0x02, 0x02, 0x97, 0x03, + 0x27, 0x07, 0xa1, 0xe2, 0x24, 0x44, 0x21, 0xab, 0x30, 0xed, 0x29, 0xd7, 0x58, 0x3a, 0x5b, 0x86, + 0x7d, 0xdb, 0xb4, 0x3c, 0x3a, 0xed, 0xb9, 0xfc, 0x0e, 0x2d, 0xa4, 0x7f, 0x46, 0x63, 0x98, 0x52, + 0x88, 0x2a, 0xc5, 0x31, 0xf7, 0x8e, 0x13, 0x63, 0x88, 0x1b, 0x6b, 0x94, 0x0f, 0x79, 0xba, 0x67, + 0x0f, 0xd9, 0x68, 0x3c, 0x34, 0x9c, 0xae, 0x7c, 0xde, 0xcb, 0xe0, 0x2f, 0x3e, 0x71, 0x32, 0x79, + 0x0e, 0x6a, 0x8a, 0xa4, 0x9e, 0xfb, 0xa5, 0x73, 0x26, 0xe8, 0x7a, 0x07, 0xae, 0xe0, 0xcb, 0xfd, + 0x96, 0xe5, 0x7a, 0x86, 0xe5, 0x9d, 0x1f, 0x95, 0xfd, 0x28, 0x2b, 0x23, 0x4d, 0x24, 0xca, 0x8a, + 0xbb, 0x89, 0x51, 0xf6, 0x21, 0xcc, 0x47, 0x99, 0x4a, 0x17, 0x6e, 0xfa, 0x77, 0x4a, 0xf8, 0x6f, + 0x10, 0x76, 0x24, 0xb2, 0x83, 0x5f, 0xfd, 0x8b, 0xf5, 0xe8, 0x6f, 0xa2, 0x3f, 0xd1, 0xa0, 0x1a, + 0xe1, 0x45, 0xee, 0x40, 0x1e, 0xcd, 0x96, 0xbc, 0x33, 0xc9, 0xc7, 0x1e, 0xf9, 0x53, 0x8b, 0x5c, + 0x10, 0xad, 0x4d, 0x35, 0x19, 0x0c, 0xc9, 0x12, 0x94, 0xc7, 0x8e, 0x3d, 0x3a, 0x94, 0x5c, 0xc5, + 0xc3, 0x28, 0x70, 0xd2, 0x36, 0x52, 0xf4, 0x3f, 0x64, 0x60, 0x0e, 0x8f, 0x4f, 0x0d, 0x6b, 0xc0, + 0x2e, 0x45, 0xa3, 0xd8, 0xc2, 0x79, 0x6c, 0x2c, 0xcd, 0x88, 0xe3, 0xe8, 0x8f, 0x74, 0x85, 0xf8, + 0x8f, 0x74, 0xa1, 0xb6, 0xb7, 0x78, 0x4e, 0xdb, 0x5b, 0xba, 0xb0, 0xed, 0x85, 0xb4, 0xb6, 0x37, + 0xd4, 0x6c, 0x96, 0xa3, 0xcd, 0x66, 0xb8, 0x21, 0xae, 0xc4, 0x1a, 0x62, 0xd5, 0x88, 0x56, 0xcf, + 0x6c, 0x44, 0x67, 0xbe, 0x54, 0x23, 0x3a, 0xfb, 0xa8, 0x8d, 0x28, 0xe6, 0x77, 0xe9, 0xfa, 0x6e, + 0xbd, 0x26, 0xce, 0xec, 0x13, 0x74, 0x17, 0x48, 0xd8, 0x60, 0xd2, 0x5b, 0x9f, 0x8f, 0x79, 0xeb, + 0x95, 0x20, 0x49, 0x9a, 0x23, 0xf6, 0xd8, 0xae, 0xfa, 0x21, 0x14, 0xdb, 0x52, 0x82, 0xcb, 0x77, + 0xd2, 0xa7, 0xa1, 0xc2, 0xc3, 0x88, 0xeb, 0x19, 0xa3, 0xf1, 0xe1, 0x48, 0x78, 0x69, 0x86, 0x96, + 0x7d, 0xda, 0x8e, 0xab, 0xaf, 0x43, 0xbe, 0x63, 0xf0, 0x8e, 0x23, 0x01, 0x9e, 0x4e, 0x80, 0x83, + 0x5d, 0xb4, 0xd0, 0x2e, 0xfa, 0x27, 0x1a, 0x40, 0xa0, 0x8b, 0xc7, 0x39, 0xc5, 0x2a, 0x14, 0x5c, + 0x14, 0x46, 0x95, 0x03, 0xb3, 0x81, 0xfa, 0x90, 0x2e, 0xf1, 0x0a, 0x75, 0xe1, 0x2d, 0x24, 0xb7, + 0xc3, 0x16, 0xcf, 0xc6, 0x52, 0xb8, 0x52, 0xbc, 0xe4, 0x1a, 0x20, 0x9f, 0x7b, 0x0f, 0x66, 0x63, + 0xcd, 0x0a, 0xa9, 0x40, 0x71, 0x77, 0xef, 0xb0, 0x4d, 0xe9, 0x1e, 0xad, 0x4d, 0x91, 0x2b, 0x30, + 0xbb, 0xb3, 0xfe, 0xee, 0xe1, 0xf6, 0xd6, 0x41, 0xfb, 0xb0, 0x4b, 0xd7, 0xef, 0xb6, 0x3b, 0x35, + 0x8d, 0x13, 0x71, 0x7c, 0xd8, 0xdd, 0xdb, 0x3b, 0xdc, 0x5e, 0xa7, 0xf7, 0xda, 0xb5, 0x69, 0x32, + 0x07, 0xd5, 0xb7, 0x77, 0xdf, 0xdc, 0xdd, 0x7b, 0x67, 0x57, 0x2e, 0xce, 0xb4, 0x7e, 0xa1, 0x41, + 0x9e, 0xb3, 0x67, 0x0e, 0xf9, 0x3e, 0x94, 0xfc, 0x96, 0x87, 0x5c, 0x8d, 0x74, 0x4a, 0xe1, 0x36, + 0xa8, 0xf1, 0x44, 0xe4, 0x93, 0x72, 0x4e, 0x7d, 0x8a, 0xac, 0x43, 0xd9, 0x07, 0x1f, 0xb4, 0xbe, + 0x0a, 0x8b, 0xd6, 0xbf, 0x35, 0xa8, 0x49, 0xbf, 0xbc, 0xc7, 0x2c, 0xe6, 0x18, 0x9e, 0xed, 0x0b, + 0x86, 0xfd, 0x4a, 0x8c, 0x6b, 0xb8, 0xf9, 0x39, 0x5b, 0xb0, 0x2d, 0x80, 0x7b, 0xcc, 0x53, 0xb5, + 0xe2, 0xb5, 0xf4, 0xe4, 0x28, 0x78, 0x5c, 0x3f, 0x23, 0x73, 0x2a, 0x56, 0xf7, 0x00, 0x82, 0x8b, + 0x49, 0x82, 0x5c, 0x9f, 0x08, 0xaf, 0x8d, 0x6b, 0xa9, 0xdf, 0xfc, 0x93, 0xfe, 0x2e, 0x0b, 0x05, + 0xfe, 0xc1, 0x64, 0x0e, 0x79, 0x1d, 0xaa, 0x3f, 0x30, 0xad, 0xbe, 0xff, 0x4b, 0x3d, 0xb9, 0x9a, + 0xf6, 0x0f, 0x02, 0x82, 0x6d, 0xe3, 0xec, 0xff, 0x1d, 0x40, 0x13, 0x54, 0xd4, 0x6f, 0x7f, 0x3d, + 0x66, 0x79, 0xe4, 0x8c, 0x1f, 0x9c, 0x1b, 0x4f, 0x26, 0xe8, 0x3e, 0x8b, 0x36, 0x94, 0x43, 0x3f, + 0x66, 0x87, 0xb5, 0x95, 0xf8, 0x89, 0xfb, 0x3c, 0x36, 0xf7, 0x00, 0x82, 0x27, 0x28, 0x72, 0xce, + 0xa3, 0x77, 0xe3, 0x5a, 0xea, 0x37, 0x9f, 0xd1, 0x9b, 0xea, 0x48, 0xe2, 0x2d, 0xeb, 0x5c, 0x56, + 0x4f, 0xa5, 0xbe, 0xa7, 0x85, 0x98, 0x1d, 0xc0, 0x6c, 0xec, 0x15, 0x87, 0x5c, 0xf4, 0x72, 0xdb, + 0x58, 0x3e, 0x1b, 0xe0, 0xf3, 0xfd, 0x61, 0xe8, 0xc1, 0x4d, 0xbd, 0x0e, 0x5d, 0xcc, 0x59, 0x3f, + 0x0b, 0x10, 0x96, 0xb9, 0xf5, 0xb7, 0x2c, 0xd4, 0x3a, 0x9e, 0xc3, 0x8c, 0x91, 0x69, 0x0d, 0x94, + 0xcb, 0xbc, 0x0a, 0x79, 0x99, 0xf8, 0x1e, 0xd5, 0xc4, 0x6b, 0x1a, 0xbf, 0x0f, 0x97, 0x62, 0x9b, + 0x35, 0x8d, 0xec, 0x5c, 0xa2, 0x75, 0xd6, 0x34, 0xf2, 0xee, 0xd7, 0x63, 0x9f, 0x35, 0x8d, 0xbc, + 0xf7, 0xf5, 0x59, 0x68, 0x4d, 0x23, 0xfb, 0x30, 0x27, 0x63, 0xc5, 0xa5, 0x44, 0x87, 0x35, 0x8d, + 0x1c, 0xc0, 0x95, 0x30, 0x47, 0x59, 0x42, 0x92, 0xeb, 0xd1, 0x75, 0xd1, 0x22, 0x39, 0xa4, 0xe1, + 0xb4, 0x6a, 0x97, 0xf3, 0x6d, 0xfd, 0x51, 0x83, 0x82, 0x8a, 0x84, 0x87, 0xa9, 0xdd, 0xaa, 0x7e, + 0x5e, 0x0f, 0x27, 0x37, 0x7a, 0xe6, 0x5c, 0xcc, 0xa5, 0x47, 0xcb, 0x8d, 0xfa, 0x47, 0x9f, 0x2f, + 0x6a, 0x9f, 0x7c, 0xbe, 0xa8, 0xfd, 0xeb, 0xf3, 0x45, 0xed, 0x97, 0x5f, 0x2c, 0x4e, 0x7d, 0xf2, + 0xc5, 0xe2, 0xd4, 0xa7, 0x5f, 0x2c, 0x4e, 0x1d, 0xe5, 0xf1, 0x5f, 0xd1, 0x5e, 0xfa, 0x6f, 0x00, + 0x00, 0x00, 0xff, 0xff, 0x3d, 0xde, 0x49, 0x19, 0x0b, 0x27, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -5755,6 +5867,18 @@ func (m *SearchTagsResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.Metrics != nil { + { + size, err := m.Metrics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintTempo(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } if len(m.TagNames) > 0 { for iNdEx := len(m.TagNames) - 1; iNdEx >= 0; iNdEx-- { i -= len(m.TagNames[iNdEx]) @@ -5787,6 +5911,18 @@ func (m *SearchTagsV2Response) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.Metrics != nil { + { + size, err := m.Metrics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintTempo(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } if len(m.Scopes) > 0 { for iNdEx := len(m.Scopes) - 1; iNdEx >= 0; iNdEx-- { { @@ -5910,6 +6046,18 @@ func (m *SearchTagValuesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) _ = i var l int _ = l + if m.Metrics != nil { + { + size, err := m.Metrics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintTempo(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } if len(m.TagValues) > 0 { for iNdEx := len(m.TagValues) - 1; iNdEx >= 0; iNdEx-- { i -= len(m.TagValues[iNdEx]) @@ -5979,6 +6127,18 @@ func (m *SearchTagValuesV2Response) MarshalToSizedBuffer(dAtA []byte) (int, erro _ = i var l int _ = l + if m.Metrics != nil { + { + size, err := m.Metrics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintTempo(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } if len(m.TagValues) > 0 { for iNdEx := len(m.TagValues) - 1; iNdEx >= 0; iNdEx-- { { @@ -5996,6 +6156,54 @@ func (m *SearchTagValuesV2Response) MarshalToSizedBuffer(dAtA []byte) (int, erro return len(dAtA) - i, nil } +func (m *MetadataMetrics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MetadataMetrics) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *MetadataMetrics) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.TotalBlockBytes != 0 { + i = encodeVarintTempo(dAtA, i, uint64(m.TotalBlockBytes)) + i-- + dAtA[i] = 0x28 + } + if m.TotalBlocks != 0 { + i = encodeVarintTempo(dAtA, i, uint64(m.TotalBlocks)) + i-- + dAtA[i] = 0x20 + } + if m.CompletedJobs != 0 { + i = encodeVarintTempo(dAtA, i, uint64(m.CompletedJobs)) + i-- + dAtA[i] = 0x18 + } + if m.TotalJobs != 0 { + i = encodeVarintTempo(dAtA, i, uint64(m.TotalJobs)) + i-- + dAtA[i] = 0x10 + } + if m.InspectedBytes != 0 { + i = encodeVarintTempo(dAtA, i, uint64(m.InspectedBytes)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + func (m *Trace) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -6054,20 +6262,20 @@ func (m *PushResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { var l int _ = l if len(m.ErrorsByTrace) > 0 { - dAtA10 := make([]byte, len(m.ErrorsByTrace)*10) - var j9 int + dAtA14 := make([]byte, len(m.ErrorsByTrace)*10) + var j13 int for _, num := range m.ErrorsByTrace { for num >= 1<<7 { - dAtA10[j9] = uint8(uint64(num)&0x7f | 0x80) + dAtA14[j13] = uint8(uint64(num)&0x7f | 0x80) num >>= 7 - j9++ + j13++ } - dAtA10[j9] = uint8(num) - j9++ + dAtA14[j13] = uint8(num) + j13++ } - i -= j9 - copy(dAtA[i:], dAtA10[:j9]) - i = encodeVarintTempo(dAtA, i, uint64(j9)) + i -= j13 + copy(dAtA[i:], dAtA14[:j13]) + i = encodeVarintTempo(dAtA, i, uint64(j13)) i-- dAtA[i] = 0xa } @@ -7800,6 +8008,10 @@ func (m *SearchTagsResponse) Size() (n int) { n += 1 + l + sovTempo(uint64(l)) } } + if m.Metrics != nil { + l = m.Metrics.Size() + n += 1 + l + sovTempo(uint64(l)) + } return n } @@ -7815,6 +8027,10 @@ func (m *SearchTagsV2Response) Size() (n int) { n += 1 + l + sovTempo(uint64(l)) } } + if m.Metrics != nil { + l = m.Metrics.Size() + n += 1 + l + sovTempo(uint64(l)) + } return n } @@ -7872,6 +8088,10 @@ func (m *SearchTagValuesResponse) Size() (n int) { n += 1 + l + sovTempo(uint64(l)) } } + if m.Metrics != nil { + l = m.Metrics.Size() + n += 1 + l + sovTempo(uint64(l)) + } return n } @@ -7904,20 +8124,48 @@ func (m *SearchTagValuesV2Response) Size() (n int) { n += 1 + l + sovTempo(uint64(l)) } } + if m.Metrics != nil { + l = m.Metrics.Size() + n += 1 + l + sovTempo(uint64(l)) + } return n } -func (m *Trace) Size() (n int) { +func (m *MetadataMetrics) Size() (n int) { if m == nil { return 0 } var l int _ = l - if len(m.ResourceSpans) > 0 { - for _, e := range m.ResourceSpans { - l = e.Size() - n += 1 + l + sovTempo(uint64(l)) - } + if m.InspectedBytes != 0 { + n += 1 + sovTempo(uint64(m.InspectedBytes)) + } + if m.TotalJobs != 0 { + n += 1 + sovTempo(uint64(m.TotalJobs)) + } + if m.CompletedJobs != 0 { + n += 1 + sovTempo(uint64(m.CompletedJobs)) + } + if m.TotalBlocks != 0 { + n += 1 + sovTempo(uint64(m.TotalBlocks)) + } + if m.TotalBlockBytes != 0 { + n += 1 + sovTempo(uint64(m.TotalBlockBytes)) + } + return n +} + +func (m *Trace) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.ResourceSpans) > 0 { + for _, e := range m.ResourceSpans { + l = e.Size() + n += 1 + l + sovTempo(uint64(l)) + } } return n } @@ -11761,6 +12009,42 @@ func (m *SearchTagsResponse) Unmarshal(dAtA []byte) error { } m.TagNames = append(m.TagNames, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metrics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTempo + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthTempo + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Metrics == nil { + m.Metrics = &MetadataMetrics{} + } + if err := m.Metrics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipTempo(dAtA[iNdEx:]) @@ -11845,6 +12129,42 @@ func (m *SearchTagsV2Response) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metrics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTempo + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthTempo + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Metrics == nil { + m.Metrics = &MetadataMetrics{} + } + if err := m.Metrics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipTempo(dAtA[iNdEx:]) @@ -12193,6 +12513,42 @@ func (m *SearchTagValuesResponse) Unmarshal(dAtA []byte) error { } m.TagValues = append(m.TagValues, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metrics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTempo + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthTempo + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Metrics == nil { + m.Metrics = &MetadataMetrics{} + } + if err := m.Metrics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipTempo(dAtA[iNdEx:]) @@ -12391,6 +12747,187 @@ func (m *SearchTagValuesV2Response) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metrics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTempo + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthTempo + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Metrics == nil { + m.Metrics = &MetadataMetrics{} + } + if err := m.Metrics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipTempo(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthTempo + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MetadataMetrics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MetadataMetrics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MetadataMetrics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InspectedBytes", wireType) + } + m.InspectedBytes = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.InspectedBytes |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalJobs", wireType) + } + m.TotalJobs = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalJobs |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field CompletedJobs", wireType) + } + m.CompletedJobs = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.CompletedJobs |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalBlocks", wireType) + } + m.TotalBlocks = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalBlocks |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalBlockBytes", wireType) + } + m.TotalBlockBytes = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTempo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalBlockBytes |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipTempo(dAtA[iNdEx:]) diff --git a/pkg/tempopb/tempo.proto b/pkg/tempopb/tempo.proto index 5acd1ad612f..1ef043877ca 100644 --- a/pkg/tempopb/tempo.proto +++ b/pkg/tempopb/tempo.proto @@ -200,10 +200,12 @@ message SearchTagValuesBlockRequest { message SearchTagsResponse { repeated string tagNames = 1; + MetadataMetrics metrics = 2; } message SearchTagsV2Response { repeated SearchTagsV2Scope scopes = 1; + MetadataMetrics metrics = 2; } message SearchTagsV2Scope { @@ -220,6 +222,7 @@ message SearchTagValuesRequest { message SearchTagValuesResponse { repeated string tagValues = 1; + MetadataMetrics metrics = 2; } message TagValue { @@ -229,6 +232,15 @@ message TagValue { message SearchTagValuesV2Response { repeated TagValue tagValues = 1; + MetadataMetrics metrics = 2; +} + +message MetadataMetrics { + uint64 inspectedBytes = 1; + uint32 totalJobs = 2; + uint32 completedJobs = 3; + uint32 totalBlocks = 4; + uint64 totalBlockBytes = 5; } message Trace { diff --git a/pkg/traceql/ast.go b/pkg/traceql/ast.go index 67d6770ff14..da18815c08c 100644 --- a/pkg/traceql/ast.go +++ b/pkg/traceql/ast.go @@ -402,6 +402,8 @@ type BinaryOperation struct { RHS FieldExpression compiledExpression *regexp.Regexp + + b branchOptimizer } func newBinaryOperation(op Operator, lhs, rhs FieldExpression) FieldExpression { @@ -417,6 +419,10 @@ func newBinaryOperation(op Operator, lhs, rhs FieldExpression) FieldExpression { } } + if (op == OpAnd || op == OpOr) && binop.referencesSpan() { + binop.b = newBranchPredictor(2, 1000) + } + return binop } diff --git a/pkg/traceql/ast_execute.go b/pkg/traceql/ast_execute.go index 464c0264109..0ee6ad0c87c 100644 --- a/pkg/traceql/ast_execute.go +++ b/pkg/traceql/ast_execute.go @@ -325,30 +325,50 @@ func (a Aggregate) evaluate(input []*Spanset) (output []*Spanset, err error) { } func (o *BinaryOperation) execute(span Span) (Static, error) { + recording := o.b.Recording + if recording { + o.b.Start() + } + lhs, err := o.LHS.execute(span) if err != nil { return NewStaticNil(), err } + if recording { + o.b.Finish(leftBranch) + } + // Look for cases where we don't even need to evalulate the RHS - if lhsB, ok := lhs.Bool(); ok { - if o.Op == OpAnd && !lhsB { - // x && y - // x is false so we don't need to evalulate y - return StaticFalse, nil - } - if o.Op == OpOr && lhsB { - // x || y - // x is true so we don't need to evalulate y - return StaticTrue, nil + // But wait until we have enough samples so we can optimize + if !recording { + if lhsB, ok := lhs.Bool(); ok { + if o.Op == OpAnd && !lhsB { + // x && y + // x is false so we don't need to evalulate y + return StaticFalse, nil + } + if o.Op == OpOr && lhsB { + // x || y + // x is true so we don't need to evalulate y + return StaticTrue, nil + } } } + if recording { + o.b.Start() + } + rhs, err := o.RHS.execute(span) if err != nil { return NewStaticNil(), err } + if recording { + o.b.Finish(rightBranch) + } + // Ensure the resolved types are still valid lhsT := lhs.Type rhsT := rhs.Type @@ -428,6 +448,37 @@ func (o *BinaryOperation) execute(span Span) (Static, error) { lhsB, _ := lhs.Bool() rhsB, _ := rhs.Bool() + if recording { + switch o.Op { + case OpAnd: + if !lhsB { + // Record cost of wasted rhs execution + o.b.Penalize(rightBranch) + } + if !rhsB { + // Record cost of wasted lhs execution + o.b.Penalize(leftBranch) + } + case OpOr: + if rhsB { + // Record cost of wasted lhs execution + o.b.Penalize(rightBranch) + } + if lhsB { + // Record cost of wasated rhs execution + o.b.Penalize(leftBranch) + } + } + + if done := o.b.Sampled(); done { + if o.b.OptimalBranch() == rightBranch { + // RHS is the optimal starting branch, + // so swap the elements now. + o.LHS, o.RHS = o.RHS, o.LHS + } + } + } + switch o.Op { case OpAnd: return NewStaticBool(lhsB && rhsB), nil diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go index e03aa2d6388..e3288d7e64b 100644 --- a/pkg/traceql/engine.go +++ b/pkg/traceql/engine.go @@ -27,7 +27,7 @@ func NewEngine() *Engine { return &Engine{} } -func (e *Engine) Compile(query string) (*RootExpr, SpansetFilterFunc, metricsFirstStageElement, *FetchSpansRequest, error) { +func Compile(query string) (*RootExpr, SpansetFilterFunc, metricsFirstStageElement, *FetchSpansRequest, error) { expr, err := Parse(query) if err != nil { return nil, nil, nil, nil, err @@ -50,12 +50,13 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq ctx, span := tracer.Start(ctx, "traceql.Engine.ExecuteSearch") defer span.End() - rootExpr, err := e.parseQuery(searchReq) + rootExpr, _, _, fetchSpansRequest, err := Compile(searchReq.Query) if err != nil { return nil, err } - fetchSpansRequest := e.createFetchSpansRequest(searchReq, rootExpr.Pipeline) + fetchSpansRequest.StartTimeUnixNanos = unixSecToNano(searchReq.Start) + fetchSpansRequest.EndTimeUnixNanos = unixSecToNano(searchReq.End) span.SetAttributes(attribute.String("pipeline", rootExpr.Pipeline.String())) span.SetAttributes(attribute.String("fetchSpansRequest", fmt.Sprint(fetchSpansRequest))) @@ -99,7 +100,7 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq return evalSS, nil } - fetchSpansResponse, err := spanSetFetcher.Fetch(ctx, fetchSpansRequest) + fetchSpansResponse, err := spanSetFetcher.Fetch(ctx, *fetchSpansRequest) if err != nil { return nil, err } @@ -204,30 +205,6 @@ func (e *Engine) ExecuteTagNames( return fetcher.Fetch(ctx, autocompleteReq, cb) } -func (e *Engine) parseQuery(searchReq *tempopb.SearchRequest) (*RootExpr, error) { - r, err := Parse(searchReq.Query) - if err != nil { - return nil, err - } - return r, r.validate() -} - -// createFetchSpansRequest will flatten the SpansetFilter in simple conditions the storage layer -// can work with. -func (e *Engine) createFetchSpansRequest(searchReq *tempopb.SearchRequest, pipeline Pipeline) FetchSpansRequest { - // TODO handle SearchRequest.MinDurationMs and MaxDurationMs, this refers to the trace level duration which is not the same as the intrinsic duration - - req := FetchSpansRequest{ - StartTimeUnixNanos: unixSecToNano(searchReq.Start), - EndTimeUnixNanos: unixSecToNano(searchReq.End), - Conditions: nil, - AllConditions: true, - } - - pipeline.extractConditions(&req) - return req -} - func (e *Engine) createAutocompleteRequest(tag Attribute, pipeline Pipeline) FetchTagValuesRequest { req := FetchSpansRequest{ Conditions: nil, diff --git a/pkg/traceql/engine_metrics.go b/pkg/traceql/engine_metrics.go index 7c34f51bb8e..3aba0a74f6c 100644 --- a/pkg/traceql/engine_metrics.go +++ b/pkg/traceql/engine_metrics.go @@ -776,7 +776,7 @@ func (e *Engine) CompileMetricsQueryRangeNonRaw(req *tempopb.QueryRangeRequest, return nil, fmt.Errorf("step required") } - _, _, metricsPipeline, _, err := e.Compile(req.Query) + _, _, metricsPipeline, _, err := Compile(req.Query) if err != nil { return nil, fmt.Errorf("compiling query: %w", err) } @@ -810,7 +810,7 @@ func (e *Engine) CompileMetricsQueryRange(req *tempopb.QueryRangeRequest, exempl return nil, fmt.Errorf("step required") } - expr, eval, metricsPipeline, storageReq, err := e.Compile(req.Query) + expr, eval, metricsPipeline, storageReq, err := Compile(req.Query) if err != nil { return nil, fmt.Errorf("compiling query: %w", err) } diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go index 2721b87d409..f7f0599b4e4 100644 --- a/pkg/traceql/engine_test.go +++ b/pkg/traceql/engine_test.go @@ -531,31 +531,23 @@ func TestExamplesInEngine(t *testing.T) { err = yaml.Unmarshal(b, queries) require.NoError(t, err) - e := NewEngine() - for _, q := range queries.Valid { t.Run("valid - "+q, func(t *testing.T) { - _, err := e.parseQuery(&tempopb.SearchRequest{ - Query: q, - }) + _, _, _, _, err := Compile(q) require.NoError(t, err) }) } for _, q := range queries.ParseFails { t.Run("parse fails - "+q, func(t *testing.T) { - _, err := e.parseQuery(&tempopb.SearchRequest{ - Query: q, - }) + _, _, _, _, err := Compile(q) require.Error(t, err) }) } for _, q := range queries.ValidateFails { t.Run("validate fails - "+q, func(t *testing.T) { - _, err := e.parseQuery(&tempopb.SearchRequest{ - Query: q, - }) + _, _, _, _, err := Compile(q) require.Error(t, err) var unErr *unsupportedError require.False(t, errors.As(err, &unErr)) @@ -564,9 +556,7 @@ func TestExamplesInEngine(t *testing.T) { for _, q := range queries.Unsupported { t.Run("unsupported - "+q, func(t *testing.T) { - _, err := e.parseQuery(&tempopb.SearchRequest{ - Query: q, - }) + _, _, _, _, err := Compile(q) require.Error(t, err) var unErr *unsupportedError require.True(t, errors.As(err, &unErr)) diff --git a/pkg/traceql/util.go b/pkg/traceql/util.go index 10ce8a96d48..90a982d2cd2 100644 --- a/pkg/traceql/util.go +++ b/pkg/traceql/util.go @@ -1,6 +1,8 @@ package traceql import ( + "time" + "github.com/grafana/tempo/pkg/tempopb" "go.opentelemetry.io/otel" ) @@ -81,3 +83,61 @@ func (b *bucketSet) addAndTest(i int) bool { b.buckets[b.sz]++ return false } + +const ( + leftBranch = 0 + rightBranch = 1 +) + +type branchOptimizer struct { + start time.Time + last []time.Duration + totals []time.Duration + Recording bool + samplesRemaining int +} + +func newBranchPredictor(numBranches int, numSamples int) branchOptimizer { + return branchOptimizer{ + totals: make([]time.Duration, numBranches), + last: make([]time.Duration, numBranches), + samplesRemaining: numSamples, + Recording: true, + } +} + +// Start recording. Should be called immediately prior to a branch execution. +func (b *branchOptimizer) Start() { + b.start = time.Now() +} + +// Finish the recording and temporarily save the cost for the given branch number. +func (b *branchOptimizer) Finish(branch int) { + b.last[branch] = time.Since(b.start) +} + +// Penalize the given branch using it's previously recorded cost. This is called after +// executing all branches and then knowing in retrospect which ones were not needed. +func (b *branchOptimizer) Penalize(branch int) { + b.totals[branch] += b.last[branch] +} + +// Sampled indicates that a full execution was done and see if we have enough samples. +func (b *branchOptimizer) Sampled() (done bool) { + b.samplesRemaining-- + b.Recording = b.samplesRemaining > 0 + return !b.Recording +} + +// OptimalBranch returns the branch with the least penalized cost over time, i.e. the optimal one to start with. +func (b *branchOptimizer) OptimalBranch() int { + mini := 0 + min := b.totals[0] + for i := 1; i < len(b.totals); i++ { + if b.totals[i] < min { + mini = i + min = b.totals[i] + } + } + return mini +} diff --git a/pkg/traceqlmetrics/metrics.go b/pkg/traceqlmetrics/metrics.go index 3798f6a078a..427fe3a8327 100644 --- a/pkg/traceqlmetrics/metrics.go +++ b/pkg/traceqlmetrics/metrics.go @@ -226,7 +226,7 @@ func GetMetrics(ctx context.Context, query, groupBy string, spanLimit int, start groupByKeys[i] = groupBys[i][0].String() } - _, eval, _, req, err := traceql.NewEngine().Compile(query) + _, eval, _, req, err := traceql.Compile(query) if err != nil { return nil, fmt.Errorf("compiling query: %w", err) } diff --git a/tempodb/backend/compression.go b/tempodb/backend/compression.go new file mode 100644 index 00000000000..dac5085dd5f --- /dev/null +++ b/tempodb/backend/compression.go @@ -0,0 +1,45 @@ +package backend + +import ( + "sync" + + "github.com/klauspost/compress/zstd" +) + +var _ Codec = (*ZstdCodec)(nil) + +type Codec interface { + Encode([]byte, []byte) ([]byte, error) + Decode([]byte) ([]byte, error) +} + +type ZstdCodec struct { + encoders sync.Pool // *zstd.Encoder + decoders sync.Pool // *zstd.Decoder +} + +func (c *ZstdCodec) Encode(src, dst []byte) ([]byte, error) { + e, _ := c.encoders.Get().(*zstd.Encoder) + if e == nil { + var err error + e, err = zstd.NewWriter(nil, zstd.WithEncoderConcurrency(1)) + if err != nil { + return nil, err + } + } + defer c.encoders.Put(e) + return e.EncodeAll(src, dst), nil +} + +func (c *ZstdCodec) Decode(buf []byte) ([]byte, error) { + d, _ := c.decoders.Get().(*zstd.Decoder) + if d == nil { + var err error + d, err = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0)) + if err != nil { + return nil, err + } + } + defer c.decoders.Put(d) + return d.DecodeAll(buf, nil) +} diff --git a/tempodb/backend/tenantindex.go b/tempodb/backend/tenantindex.go index dcebcde1acc..a11f1f04f48 100644 --- a/tempodb/backend/tenantindex.go +++ b/tempodb/backend/tenantindex.go @@ -8,14 +8,16 @@ import ( proto "github.com/gogo/protobuf/proto" "github.com/klauspost/compress/gzip" - "github.com/klauspost/compress/zstd" ) const ( internalFilename = "index.json" ) -var _ proto.Message = (*TenantIndex)(nil) +var ( + _ proto.Message = (*TenantIndex)(nil) + Zstd = &ZstdCodec{} +) func newTenantIndex(meta []*BlockMeta, compactedMeta []*CompactedBlockMeta) *TenantIndex { return &TenantIndex{ @@ -63,38 +65,17 @@ func (b *TenantIndex) unmarshal(buffer []byte) error { } func (b *TenantIndex) marshalPb() ([]byte, error) { - buffer := &bytes.Buffer{} - - z, err := zstd.NewWriter(buffer) - if err != nil { - return nil, err - } - pbBytes, err := proto.Marshal(b) if err != nil { return nil, err } - if _, err = z.Write(pbBytes); err != nil { - return nil, err - } - if err = z.Flush(); err != nil { - return nil, err - } - if err = z.Close(); err != nil { - return nil, err - } - - return buffer.Bytes(), nil + buffer := []byte{} + return Zstd.Encode(pbBytes, buffer) } func (b *TenantIndex) unmarshalPb(buffer []byte) error { - decoder, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(0)) - if err != nil { - return fmt.Errorf("error creating zstd decoder: %w", err) - } - - bb, err := decoder.DecodeAll(buffer, nil) + bb, err := Zstd.Decode(buffer) if err != nil { return fmt.Errorf("error decoding zstd: %w", err) } diff --git a/tempodb/backend/tenantindex_benchmark_test.go b/tempodb/backend/tenantindex_benchmark_test.go index 1a48d82caa5..aeb2eff6a99 100644 --- a/tempodb/backend/tenantindex_benchmark_test.go +++ b/tempodb/backend/tenantindex_benchmark_test.go @@ -85,3 +85,44 @@ func BenchmarkIndexUnmarshal(b *testing.B) { _ = unIdx.unmarshal(buf) } } + +func BenchmarkIndexUnmarshalPb(b *testing.B) { + idx := &TenantIndex{ + Meta: []*BlockMeta{ + NewBlockMeta("test", uuid.New(), "v1", EncGZIP, "adsf"), + NewBlockMeta("test", uuid.New(), "v2", EncNone, "adsf"), + NewBlockMeta("test", uuid.New(), "v3", EncLZ4_4M, "adsf"), + }, + CompactedMeta: []*CompactedBlockMeta{ + { + BlockMeta: *NewBlockMeta("test", uuid.New(), "v1", EncGZIP, "adsf"), + CompactedTime: time.Now(), + }, + { + BlockMeta: *NewBlockMeta("test", uuid.New(), "v1", EncZstd, "adsf"), + CompactedTime: time.Now(), + }, + { + BlockMeta: *NewBlockMeta("test", uuid.New(), "v1", EncSnappy, "adsf"), + CompactedTime: time.Now(), + }, + }, + } + + for i := range idx.Meta { + idx.Meta[i].DedicatedColumns = DedicatedColumns{ + {Scope: "resource", Name: "namespace", Type: "string"}, + {Scope: "span", Name: "http.method", Type: "string"}, + {Scope: "span", Name: "namespace", Type: "string"}, + } + } + + buf, err := idx.marshalPb() + require.NoError(b, err) + + unIdx := &TenantIndex{} + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = unIdx.unmarshalPb(buf) + } +} diff --git a/tempodb/backend/test/backend_test.go b/tempodb/backend/test/backend_test.go index fdf4a5d9841..dd883c70f3d 100644 --- a/tempodb/backend/test/backend_test.go +++ b/tempodb/backend/test/backend_test.go @@ -126,6 +126,7 @@ func TestOriginalFixtures(t *testing.T) { i, err := r.TenantIndex(ctx, tenant) assert.NoError(t, err) assert.NotNil(t, i) + assert.NotZero(t, i.CreatedAt) assert.Equal(t, 22435, len(i.Meta)) assert.Equal(t, 3264, len(i.CompactedMeta)) diff --git a/tempodb/backend/v1.pb.go b/tempodb/backend/v1.pb.go index c0f6be303db..a707ee36282 100644 --- a/tempodb/backend/v1.pb.go +++ b/tempodb/backend/v1.pb.go @@ -217,7 +217,7 @@ func (m *CompactedBlockMeta) GetCompactedTime() time.Time { } type TenantIndex struct { - CreatedAt time.Time `protobuf:"bytes,1,opt,name=created_at,json=createdAt,proto3,stdtime" json:"createdAt"` + CreatedAt time.Time `protobuf:"bytes,1,opt,name=created_at,json=createdAt,proto3,stdtime" json:"created_at"` Meta []*BlockMeta `protobuf:"bytes,2,rep,name=meta,proto3" json:"meta"` CompactedMeta []*CompactedBlockMeta `protobuf:"bytes,3,rep,name=compacted_meta,json=compactedMeta,proto3" json:"compacted"` } @@ -285,57 +285,57 @@ func init() { func init() { proto.RegisterFile("tempodb/backend/v1/v1.proto", fileDescriptor_6bc10ae735c1a340) } var fileDescriptor_6bc10ae735c1a340 = []byte{ - // 787 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xdd, 0x8a, 0xdb, 0x46, - 0x14, 0xb6, 0x76, 0xb7, 0x6b, 0x7b, 0x6c, 0xaf, 0xed, 0x09, 0x01, 0xd5, 0x01, 0x8f, 0x31, 0xbd, - 0x70, 0x21, 0x95, 0xd9, 0x84, 0x14, 0x4a, 0x69, 0xa1, 0xda, 0x6d, 0x21, 0xa5, 0x3f, 0x41, 0xd9, - 0xdc, 0x94, 0x82, 0x18, 0x69, 0xc6, 0x8a, 0x1a, 0x49, 0x63, 0xa4, 0xb1, 0x69, 0xf3, 0x14, 0x79, - 0x9a, 0x3e, 0x43, 0xe8, 0xd5, 0x5e, 0x96, 0x5e, 0x4c, 0x8b, 0xf7, 0x4e, 0x4f, 0x51, 0xe6, 0xe8, - 0xcf, 0x76, 0x28, 0xe9, 0xcd, 0x72, 0xce, 0xf9, 0xce, 0x77, 0xe6, 0x7c, 0xa3, 0xf9, 0xd6, 0xe8, - 0x81, 0xe4, 0xf1, 0x5a, 0x30, 0x6f, 0xe9, 0x51, 0xff, 0x15, 0x4f, 0xd8, 0x72, 0x7b, 0xb9, 0xdc, - 0x5e, 0x5a, 0xeb, 0x54, 0x48, 0x81, 0x51, 0x59, 0xb4, 0xb6, 0x97, 0x13, 0x12, 0x08, 0x11, 0x44, - 0x7c, 0x09, 0x88, 0xb7, 0x59, 0x2d, 0x65, 0x18, 0xf3, 0x4c, 0xd2, 0x78, 0x5d, 0x34, 0x4f, 0x3e, - 0x09, 0x42, 0xf9, 0x72, 0xe3, 0x59, 0xbe, 0x88, 0x97, 0x81, 0x08, 0x44, 0xd3, 0xa9, 0x33, 0x48, - 0x20, 0x2a, 0xda, 0xe7, 0x7f, 0xb4, 0x51, 0xd7, 0x8e, 0x84, 0xff, 0xea, 0x7b, 0x2e, 0x29, 0xfe, - 0x08, 0xb5, 0xb7, 0x3c, 0xcd, 0x42, 0x91, 0x98, 0xc6, 0xcc, 0x58, 0x74, 0x6d, 0x94, 0x2b, 0x72, - 0xbe, 0x12, 0x69, 0x4c, 0xa5, 0x53, 0x41, 0xf8, 0x0b, 0xd4, 0xf1, 0x34, 0xc5, 0x0d, 0x99, 0x79, - 0x32, 0x33, 0x16, 0x7d, 0x7b, 0xfe, 0x56, 0x91, 0xd6, 0x5f, 0x8a, 0x9c, 0xbd, 0x78, 0xf1, 0xf4, - 0x7a, 0xa7, 0x48, 0x1b, 0x46, 0x3e, 0xbd, 0xce, 0x15, 0x69, 0x7b, 0x45, 0xe8, 0x94, 0x01, 0xc3, - 0x4f, 0x50, 0x57, 0xf2, 0x84, 0x26, 0x52, 0xf3, 0x3f, 0x80, 0x63, 0xcc, 0x9d, 0x22, 0x9d, 0x1b, - 0x28, 0x02, 0xa9, 0x23, 0xcb, 0xd8, 0xa9, 0x22, 0x86, 0x9f, 0x21, 0x94, 0x49, 0x9a, 0x4a, 0x57, - 0x2b, 0x36, 0xcf, 0x67, 0xc6, 0xa2, 0xf7, 0x68, 0x62, 0x15, 0xd7, 0x61, 0x55, 0x22, 0xad, 0x9b, - 0xea, 0x3a, 0xec, 0xfb, 0x7a, 0xa7, 0x5c, 0x91, 0x2e, 0xb0, 0x74, 0xfd, 0xcd, 0xdf, 0xc4, 0x70, - 0x9a, 0x14, 0x7f, 0x8b, 0x3a, 0x3c, 0x61, 0xc5, 0xbc, 0xf6, 0x7b, 0xe7, 0xdd, 0x2b, 0xe7, 0xb5, - 0x79, 0xc2, 0xea, 0x69, 0x55, 0x82, 0x9f, 0xa0, 0x81, 0x14, 0x92, 0x46, 0xae, 0xf0, 0x7e, 0xe1, - 0xbe, 0xcc, 0xcc, 0xce, 0xcc, 0x58, 0x9c, 0xda, 0xa3, 0x5c, 0x91, 0x3e, 0x00, 0x3f, 0x16, 0x75, - 0xe7, 0x20, 0xc3, 0x18, 0x9d, 0x65, 0xe1, 0x6b, 0x6e, 0x76, 0x67, 0xc6, 0xe2, 0xcc, 0x81, 0x18, - 0x7f, 0x89, 0x46, 0xbe, 0x88, 0xd7, 0xd4, 0x97, 0xa1, 0x48, 0xdc, 0x88, 0x6f, 0x79, 0x64, 0xa2, - 0x99, 0xb1, 0x18, 0xd8, 0xf7, 0x72, 0x45, 0x86, 0x0d, 0xf6, 0x9d, 0x86, 0x9c, 0xe3, 0x02, 0x7e, - 0xa8, 0x65, 0xf9, 0x82, 0x85, 0x49, 0x60, 0xf6, 0xe0, 0xf3, 0x8c, 0xca, 0xcf, 0xd3, 0xf9, 0xba, - 0xac, 0x3b, 0x75, 0x07, 0xfe, 0x0c, 0x0d, 0xc3, 0x84, 0xf1, 0x5f, 0xdd, 0x35, 0x0d, 0xb8, 0x0b, - 0xcb, 0xf4, 0xe1, 0xb0, 0x71, 0xae, 0xc8, 0x00, 0xa0, 0x67, 0x34, 0xe0, 0xcf, 0xc3, 0xd7, 0xdc, - 0x39, 0x4c, 0x1b, 0xcd, 0x29, 0xf7, 0x45, 0xca, 0x32, 0x73, 0x00, 0xc4, 0x46, 0xb3, 0x53, 0xd4, - 0x9d, 0x83, 0x4c, 0xd3, 0x18, 0x95, 0xd4, 0xad, 0x97, 0xbc, 0x80, 0x37, 0x00, 0x34, 0x0d, 0xd4, - 0x4b, 0x1e, 0x64, 0xf8, 0x73, 0x34, 0xf6, 0x22, 0x21, 0x62, 0x37, 0x7b, 0x49, 0x53, 0xe6, 0xfa, - 0x62, 0x93, 0x48, 0x73, 0x08, 0x27, 0x0e, 0x73, 0x45, 0x7a, 0x00, 0x3e, 0xd7, 0x58, 0xe6, 0x0c, - 0x9b, 0xe4, 0x4a, 0xf7, 0xe1, 0x25, 0xea, 0xad, 0x84, 0x90, 0x3c, 0x2d, 0x14, 0x8e, 0x80, 0x76, - 0x91, 0x2b, 0x82, 0x8a, 0x32, 0xc8, 0xdb, 0x8b, 0xb1, 0x8f, 0xc6, 0x8c, 0xb3, 0xd0, 0xa7, 0x92, - 0xeb, 0xb3, 0xa2, 0x4d, 0x9c, 0x64, 0xe6, 0x18, 0x6e, 0xf3, 0xd3, 0xf2, 0x36, 0x47, 0xd7, 0x55, - 0xc3, 0x55, 0x81, 0xe7, 0x8a, 0x4c, 0xd8, 0x51, 0xed, 0xa1, 0x88, 0x43, 0xed, 0x6d, 0xf9, 0x9b, - 0x33, 0x3a, 0xc6, 0xf0, 0x0f, 0x08, 0xa7, 0x7c, 0x1d, 0xe9, 0xa2, 0xfe, 0xd4, 0x2b, 0xea, 0x4b, - 0x91, 0x9a, 0x18, 0x96, 0x23, 0xb9, 0x22, 0x0f, 0xf6, 0xd0, 0x6f, 0x00, 0xdc, 0x1b, 0x37, 0x7e, - 0x07, 0x9c, 0xff, 0x6e, 0x20, 0x7c, 0x55, 0xbc, 0x06, 0xce, 0x1a, 0x57, 0xdb, 0x08, 0x15, 0x7e, - 0x8d, 0xb9, 0xa4, 0x60, 0xec, 0xde, 0xa3, 0xfb, 0x56, 0xf3, 0x4f, 0xc5, 0xaa, 0x5b, 0xed, 0xbe, - 0xd6, 0x76, 0xab, 0x88, 0x91, 0x2b, 0xd2, 0x72, 0xba, 0x5e, 0x3d, 0xe3, 0x67, 0x74, 0xe1, 0x57, - 0x93, 0x0b, 0xc7, 0x9c, 0xbc, 0xd7, 0x31, 0x1f, 0x96, 0x8e, 0x19, 0xd4, 0xcc, 0xda, 0x37, 0x87, - 0xa5, 0x79, 0x6e, 0xa0, 0x5e, 0x69, 0x7f, 0xfd, 0xc2, 0xb4, 0xd7, 0xfd, 0x94, 0xc3, 0xdd, 0x53, - 0x59, 0x6e, 0xfc, 0xbf, 0xbc, 0x5e, 0xb2, 0xbe, 0x92, 0x85, 0xd7, 0xeb, 0x14, 0x3f, 0x46, 0x67, - 0xa0, 0xfe, 0x64, 0x76, 0xfa, 0xdf, 0xea, 0x3b, 0xb9, 0x22, 0xd0, 0xe6, 0xc0, 0x5f, 0x7c, 0xb3, - 0x2f, 0x1a, 0xe8, 0xa7, 0x40, 0x9f, 0xee, 0xd3, 0xdf, 0xbd, 0x70, 0x7b, 0x00, 0xab, 0x54, 0xf5, - 0x3d, 0xb1, 0x80, 0x7e, 0xfc, 0x76, 0x37, 0x35, 0x6e, 0x77, 0x53, 0xe3, 0x9f, 0xdd, 0xd4, 0x78, - 0x73, 0x37, 0x6d, 0xdd, 0xde, 0x4d, 0x5b, 0x7f, 0xde, 0x4d, 0x5b, 0x3f, 0x0d, 0x8f, 0x7e, 0x05, - 0xbc, 0x73, 0xd0, 0xfa, 0xf8, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0x97, 0x8d, 0x86, 0x48, 0x1f, - 0x06, 0x00, 0x00, + // 793 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x55, 0x5f, 0x8f, 0xdb, 0x44, + 0x10, 0x8f, 0xef, 0xc2, 0x25, 0xd9, 0x24, 0x97, 0x64, 0xab, 0x22, 0x93, 0x4a, 0xd9, 0x28, 0xe2, + 0x21, 0x48, 0xc5, 0xd1, 0xb5, 0x2a, 0x12, 0x42, 0x20, 0xe1, 0x3b, 0x90, 0x8a, 0xf8, 0x53, 0xdc, + 0xeb, 0x0b, 0x42, 0xb2, 0xd6, 0xde, 0x8d, 0x6b, 0x6a, 0x7b, 0x23, 0x7b, 0x13, 0x41, 0x3f, 0x45, + 0x3f, 0x0d, 0x9f, 0xa1, 0xe2, 0xe9, 0x1e, 0x11, 0x0f, 0x0b, 0xca, 0xbd, 0x99, 0x2f, 0x81, 0x76, + 0xec, 0xd8, 0x49, 0x2a, 0xda, 0x97, 0x68, 0x66, 0x7e, 0xf3, 0x9b, 0x99, 0xdf, 0xd8, 0xe3, 0xa0, + 0x7b, 0x92, 0xc7, 0x2b, 0xc1, 0xbc, 0x85, 0x47, 0xfd, 0x17, 0x3c, 0x61, 0x8b, 0xcd, 0xc5, 0x62, + 0x73, 0x61, 0xad, 0x52, 0x21, 0x05, 0x46, 0x65, 0xd0, 0xda, 0x5c, 0x8c, 0x49, 0x20, 0x44, 0x10, + 0xf1, 0x05, 0x20, 0xde, 0x7a, 0xb9, 0x90, 0x61, 0xcc, 0x33, 0x49, 0xe3, 0x55, 0x91, 0x3c, 0xfe, + 0x38, 0x08, 0xe5, 0xf3, 0xb5, 0x67, 0xf9, 0x22, 0x5e, 0x04, 0x22, 0x10, 0x75, 0xa6, 0xf6, 0xc0, + 0x01, 0xab, 0x48, 0x9f, 0xfd, 0xd1, 0x42, 0x1d, 0x3b, 0x12, 0xfe, 0x8b, 0xef, 0xb8, 0xa4, 0xf8, + 0x43, 0xd4, 0xda, 0xf0, 0x34, 0x0b, 0x45, 0x62, 0x1a, 0x53, 0x63, 0xde, 0xb1, 0x51, 0xae, 0xc8, + 0xd9, 0x52, 0xa4, 0x31, 0x95, 0xce, 0x0e, 0xc2, 0x9f, 0xa3, 0xb6, 0xa7, 0x29, 0x6e, 0xc8, 0xcc, + 0x93, 0xa9, 0x31, 0xef, 0xd9, 0xb3, 0xd7, 0x8a, 0x34, 0xfe, 0x52, 0xa4, 0xf9, 0xec, 0xd9, 0xe3, + 0xab, 0xad, 0x22, 0x2d, 0x28, 0xf9, 0xf8, 0x2a, 0x57, 0xa4, 0xe5, 0x15, 0xa6, 0x53, 0x1a, 0x0c, + 0x3f, 0x42, 0x1d, 0xc9, 0x13, 0x9a, 0x48, 0xcd, 0x7f, 0x0f, 0xda, 0x98, 0x5b, 0x45, 0xda, 0xd7, + 0x10, 0x04, 0x52, 0x5b, 0x96, 0xb6, 0xb3, 0xb3, 0x18, 0x7e, 0x82, 0x50, 0x26, 0x69, 0x2a, 0x5d, + 0xad, 0xd8, 0x3c, 0x9b, 0x1a, 0xf3, 0xee, 0x83, 0xb1, 0x55, 0xac, 0xc3, 0xda, 0x89, 0xb4, 0xae, + 0x77, 0xeb, 0xb0, 0xef, 0xea, 0x99, 0x72, 0x45, 0x3a, 0xc0, 0xd2, 0xf1, 0x57, 0x7f, 0x13, 0xc3, + 0xa9, 0x5d, 0xfc, 0x0d, 0x6a, 0xf3, 0x84, 0x15, 0xf5, 0x5a, 0xef, 0xac, 0x77, 0xa7, 0xac, 0xd7, + 0xe2, 0x09, 0xab, 0xaa, 0xed, 0x1c, 0xfc, 0x08, 0xf5, 0xa5, 0x90, 0x34, 0x72, 0x85, 0xf7, 0x0b, + 0xf7, 0x65, 0x66, 0xb6, 0xa7, 0xc6, 0xfc, 0xd4, 0x1e, 0xe6, 0x8a, 0xf4, 0x00, 0xf8, 0xa1, 0x88, + 0x3b, 0x07, 0x1e, 0xc6, 0xa8, 0x99, 0x85, 0x2f, 0xb9, 0xd9, 0x99, 0x1a, 0xf3, 0xa6, 0x03, 0x36, + 0xfe, 0x02, 0x0d, 0x7d, 0x11, 0xaf, 0xa8, 0x2f, 0x43, 0x91, 0xb8, 0x11, 0xdf, 0xf0, 0xc8, 0x44, + 0x53, 0x63, 0xde, 0xb7, 0xef, 0xe4, 0x8a, 0x0c, 0x6a, 0xec, 0x5b, 0x0d, 0x39, 0xc7, 0x01, 0x7c, + 0x5f, 0xcb, 0xf2, 0x05, 0x0b, 0x93, 0xc0, 0xec, 0xc2, 0xe3, 0x19, 0x96, 0x8f, 0xa7, 0xfd, 0x55, + 0x19, 0x77, 0xaa, 0x0c, 0xfc, 0x29, 0x1a, 0x84, 0x09, 0xe3, 0xbf, 0xba, 0x2b, 0x1a, 0x70, 0x17, + 0x86, 0xe9, 0x41, 0xb3, 0x51, 0xae, 0x48, 0x1f, 0xa0, 0x27, 0x34, 0xe0, 0x4f, 0xc3, 0x97, 0xdc, + 0x39, 0x74, 0x6b, 0xcd, 0x29, 0xf7, 0x45, 0xca, 0x32, 0xb3, 0x0f, 0xc4, 0x5a, 0xb3, 0x53, 0xc4, + 0x9d, 0x03, 0x4f, 0xd3, 0x18, 0x95, 0xd4, 0xad, 0x86, 0x3c, 0x87, 0x77, 0x00, 0x68, 0x1a, 0xa8, + 0x86, 0x3c, 0xf0, 0xf0, 0x67, 0x68, 0xe4, 0x45, 0x42, 0xc4, 0x6e, 0xf6, 0x9c, 0xa6, 0xcc, 0xf5, + 0xc5, 0x3a, 0x91, 0xe6, 0x00, 0x3a, 0x0e, 0x72, 0x45, 0xba, 0x00, 0x3e, 0xd5, 0x58, 0xe6, 0x0c, + 0x6a, 0xe7, 0x52, 0xe7, 0xe1, 0x05, 0xea, 0x2e, 0x85, 0x90, 0x3c, 0x2d, 0x14, 0x0e, 0x81, 0x76, + 0x9e, 0x2b, 0x82, 0x8a, 0x30, 0xc8, 0xdb, 0xb3, 0xb1, 0x8f, 0x46, 0x8c, 0xb3, 0xd0, 0xa7, 0x92, + 0xeb, 0x5e, 0xd1, 0x3a, 0x4e, 0x32, 0x73, 0x04, 0xdb, 0xfc, 0xa4, 0xdc, 0xe6, 0xf0, 0x6a, 0x97, + 0x70, 0x59, 0xe0, 0xb9, 0x22, 0x63, 0x76, 0x14, 0xbb, 0x2f, 0xe2, 0x50, 0xdf, 0xb6, 0xfc, 0xcd, + 0x19, 0x1e, 0x63, 0xf8, 0x7b, 0x84, 0x53, 0xbe, 0x8a, 0x74, 0x50, 0x3f, 0xea, 0x25, 0xf5, 0xa5, + 0x48, 0x4d, 0x0c, 0xc3, 0x91, 0x5c, 0x91, 0x7b, 0x7b, 0xe8, 0xd7, 0x00, 0xee, 0x95, 0x1b, 0xbd, + 0x01, 0xce, 0x7e, 0x37, 0x10, 0xbe, 0x2c, 0xde, 0x06, 0xce, 0xea, 0xab, 0xb6, 0x11, 0x2a, 0xee, + 0x35, 0xe6, 0x92, 0xc2, 0x61, 0x77, 0x1f, 0xdc, 0xb5, 0xea, 0x8f, 0x8a, 0x55, 0xa5, 0xda, 0x3d, + 0xad, 0xed, 0x46, 0x11, 0x23, 0x57, 0xa4, 0xe1, 0x74, 0xbc, 0xaa, 0xc6, 0xcf, 0xe8, 0xdc, 0xdf, + 0x55, 0x2e, 0x2e, 0xe6, 0xe4, 0x9d, 0x17, 0xf3, 0x41, 0x79, 0x31, 0xfd, 0x8a, 0x59, 0xdd, 0xcd, + 0x61, 0x68, 0xf6, 0xaf, 0x81, 0xba, 0xe5, 0xf9, 0xeb, 0x37, 0x0c, 0xff, 0x88, 0x90, 0x9f, 0x72, + 0xd8, 0x3d, 0x95, 0xe5, 0xc4, 0x6f, 0xeb, 0xf4, 0x7e, 0xd9, 0x69, 0x8f, 0x55, 0x1c, 0x7b, 0xe9, + 0x7f, 0x29, 0xf1, 0x43, 0xd4, 0x04, 0xf9, 0x27, 0xd3, 0xd3, 0xff, 0x97, 0xdf, 0xce, 0x15, 0x81, + 0x34, 0x07, 0x7e, 0xf1, 0xf5, 0xbe, 0x6a, 0xa0, 0x9f, 0x02, 0x7d, 0xb2, 0x4f, 0x7f, 0x73, 0xe3, + 0x76, 0x5f, 0x7f, 0x77, 0x2a, 0xe6, 0x9e, 0x5a, 0x40, 0x3f, 0x7a, 0xbd, 0x9d, 0x18, 0x37, 0xdb, + 0x89, 0xf1, 0xcf, 0x76, 0x62, 0xbc, 0xba, 0x9d, 0x34, 0x6e, 0x6e, 0x27, 0x8d, 0x3f, 0x6f, 0x27, + 0x8d, 0x9f, 0x06, 0x47, 0x7f, 0x03, 0xde, 0x19, 0x88, 0x7d, 0xf8, 0x5f, 0x00, 0x00, 0x00, 0xff, + 0xff, 0xaf, 0x58, 0xa6, 0xc0, 0x20, 0x06, 0x00, 0x00, } func (m *BlockMeta) Marshal() (dAtA []byte, err error) { diff --git a/tempodb/backend/v1/v1.proto b/tempodb/backend/v1/v1.proto index 5bdb6b4171b..d6b9fbeca3a 100644 --- a/tempodb/backend/v1/v1.proto +++ b/tempodb/backend/v1/v1.proto @@ -33,7 +33,7 @@ message CompactedBlockMeta { } message TenantIndex { - google.protobuf.Timestamp created_at = 1[(gogoproto.stdtime) = true, (gogoproto.nullable) = false, (gogoproto.jsontag) = "createdAt"]; + google.protobuf.Timestamp created_at = 1[(gogoproto.stdtime) = true, (gogoproto.nullable) = false, (gogoproto.jsontag) = "created_at"]; repeated BlockMeta meta = 2[(gogoproto.jsontag) = "meta"]; repeated CompactedBlockMeta compacted_meta = 3[(gogoproto.jsontag) = "compacted"]; } diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 2b4efb8abaa..69093448cda 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -198,7 +198,22 @@ func (rw *readerWriter) compact(ctx context.Context, blockMetas []*backend.Block var totalRecords int for _, blockMeta := range blockMetas { - level.Info(rw.logger).Log("msg", "compacting block", "block", fmt.Sprintf("%+v", blockMeta)) + level.Info(rw.logger).Log( + "msg", "compacting block", + "version", blockMeta.Version, + "tenantID", blockMeta.TenantID, + "blockID", blockMeta.BlockID.String(), + "startTime", blockMeta.StartTime.String(), + "endTime", blockMeta.EndTime.String(), + "totalObjects", blockMeta.TotalObjects, + "size", blockMeta.Size_, + "compactionLevel", blockMeta.CompactionLevel, + "encoding", blockMeta.Encoding.String(), + "totalRecords", blockMeta.TotalObjects, + "bloomShardCount", blockMeta.BloomShardCount, + "footerSize", blockMeta.FooterSize, + "replicationFactor", blockMeta.ReplicationFactor, + ) totalRecords += int(blockMeta.TotalObjects) // Make sure block still exists @@ -272,7 +287,7 @@ func (rw *readerWriter) compact(ctx context.Context, blockMetas []*backend.Block time.Since(startTime), } for _, meta := range newCompactedBlocks { - logArgs = append(logArgs, "block", fmt.Sprintf("%+v", meta)) + logArgs = append(logArgs, "blockID", meta.BlockID.String()) } level.Info(rw.logger).Log(logArgs...) diff --git a/tempodb/encoding/common/interfaces.go b/tempodb/encoding/common/interfaces.go index 297aa369158..1ce00c99fe5 100644 --- a/tempodb/encoding/common/interfaces.go +++ b/tempodb/encoding/common/interfaces.go @@ -19,17 +19,19 @@ type ( TagsCallback func(t string, scope traceql.AttributeScope) TagValuesCallback func(t string) bool TagValuesCallbackV2 func(traceql.Static) (stop bool) + MetricsCallback func(bytesRead uint64) // callback for accumulating bytesRead ) type Searcher interface { Search(ctx context.Context, req *tempopb.SearchRequest, opts SearchOptions) (*tempopb.SearchResponse, error) - SearchTags(ctx context.Context, scope traceql.AttributeScope, cb TagsCallback, opts SearchOptions) error - SearchTagValues(ctx context.Context, tag string, cb TagValuesCallback, opts SearchOptions) error - SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb TagValuesCallbackV2, opts SearchOptions) error + SearchTags(ctx context.Context, scope traceql.AttributeScope, cb TagsCallback, mcb MetricsCallback, opts SearchOptions) error + SearchTagValues(ctx context.Context, tag string, cb TagValuesCallback, mcb MetricsCallback, opts SearchOptions) error + SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb TagValuesCallbackV2, mcb MetricsCallback, opts SearchOptions) error + // TODO(suraj): use MetricsCallback in Fetch and remove the Bytes callback from FetchSpansResponse Fetch(context.Context, traceql.FetchSpansRequest, SearchOptions) (traceql.FetchSpansResponse, error) - FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, SearchOptions) error - FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, SearchOptions) error + FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, MetricsCallback, SearchOptions) error + FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, MetricsCallback, SearchOptions) error } type SearchOptions struct { diff --git a/tempodb/encoding/v2/backend_block.go b/tempodb/encoding/v2/backend_block.go index 13995133cc4..99ad1830b25 100644 --- a/tempodb/encoding/v2/backend_block.go +++ b/tempodb/encoding/v2/backend_block.go @@ -152,15 +152,15 @@ func (b *BackendBlock) Search(context.Context, *tempopb.SearchRequest, common.Se return nil, common.ErrUnsupported } -func (b *BackendBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.SearchOptions) error { +func (b *BackendBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (b *BackendBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.SearchOptions) error { +func (b *BackendBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (b *BackendBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.SearchOptions) error { +func (b *BackendBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } @@ -168,10 +168,10 @@ func (b *BackendBlock) Fetch(context.Context, traceql.FetchSpansRequest, common. return traceql.FetchSpansResponse{}, common.ErrUnsupported } -func (b *BackendBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +func (b *BackendBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (b *BackendBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.SearchOptions) error { +func (b *BackendBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } diff --git a/tempodb/encoding/v2/wal_block.go b/tempodb/encoding/v2/wal_block.go index df583670871..e128883be79 100644 --- a/tempodb/encoding/v2/wal_block.go +++ b/tempodb/encoding/v2/wal_block.go @@ -241,7 +241,7 @@ func (a *walBlock) Clear() error { return os.Remove(name) } -// Find implements common.Finder +// FindTraceByID Find implements common.Finder func (a *walBlock) FindTraceByID(ctx context.Context, id common.ID, _ common.SearchOptions) (*tempopb.Trace, error) { _, span := tracer.Start(ctx, "v2WalBlock.FindTraceByID") defer span.End() @@ -286,17 +286,17 @@ func (a *walBlock) Search(context.Context, *tempopb.SearchRequest, common.Search return nil, common.ErrUnsupported } -// Search implements common.Searcher -func (a *walBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.SearchOptions) error { +// SearchTags implements common.Searcher +func (a *walBlock) SearchTags(context.Context, traceql.AttributeScope, common.TagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } // SearchTagValues implements common.Searcher -func (a *walBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.SearchOptions) error { +func (a *walBlock) SearchTagValues(context.Context, string, common.TagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (a *walBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.SearchOptions) error { +func (a *walBlock) SearchTagValuesV2(context.Context, traceql.Attribute, common.TagValuesCallbackV2, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } @@ -305,13 +305,13 @@ func (a *walBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.Sear return traceql.FetchSpansResponse{}, common.ErrUnsupported } -// FetchTagValues implements traceql.Searcher -func (a *walBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +// FetchTagValues implements common.Searcher +func (a *walBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -// FetchTagNames implements traceql.Searcher -func (a *walBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.SearchOptions) error { +// FetchTagNames implements common.Searcher +func (a *walBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } diff --git a/tempodb/encoding/vparquet2/block.go b/tempodb/encoding/vparquet2/block.go index 7f90d78d1e0..93de508dfa3 100644 --- a/tempodb/encoding/vparquet2/block.go +++ b/tempodb/encoding/vparquet2/block.go @@ -37,10 +37,10 @@ func (b *backendBlock) BlockMeta() *backend.BlockMeta { return b.meta } -func (b *backendBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +func (b *backendBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (b *backendBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.SearchOptions) error { +func (b *backendBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } diff --git a/tempodb/encoding/vparquet2/block_search_tags.go b/tempodb/encoding/vparquet2/block_search_tags.go index 8c8480f6f86..c6d067399ab 100644 --- a/tempodb/encoding/vparquet2/block_search_tags.go +++ b/tempodb/encoding/vparquet2/block_search_tags.go @@ -43,7 +43,7 @@ var nonTraceQLAttributes = map[string]string{ LabelRootSpanName: columnPathRootSpanName, } -func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTags", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -56,7 +56,10 @@ func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeSc if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + defer func() { + mcb(rr.BytesRead()) // record bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTags(derivedCtx, scope, cb, pf) } @@ -181,7 +184,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC return nil } -func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { att, ok := translateTagToAttribute[tag] if !ok { att = traceql.NewAttribute(tag) @@ -193,10 +196,10 @@ func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb commo return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTagValuesV2", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -209,7 +212,10 @@ func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attrib if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + defer func() { + mcb(rr.BytesRead()) // record bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTagValues(derivedCtx, tag, cb, pf) } diff --git a/tempodb/encoding/vparquet2/block_search_tags_test.go b/tempodb/encoding/vparquet2/block_search_tags_test.go index d3905d67530..a32e1bcb0f5 100644 --- a/tempodb/encoding/vparquet2/block_search_tags_test.go +++ b/tempodb/encoding/vparquet2/block_search_tags_test.go @@ -24,10 +24,13 @@ func TestBackendBlockSearchTags(t *testing.T) { cb := func(s string, _ traceql.AttributeScope) { foundAttrs[s] = struct{}{} } + mc := collector.NewMetricsCollector() ctx := context.Background() - err := block.SearchTags(ctx, scope, cb, common.DefaultSearchOptions()) + err := block.SearchTags(ctx, scope, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) // test that all attrs are in found attrs for k := range attrs { @@ -71,10 +74,13 @@ func TestBackendBlockSearchTagValues(t *testing.T) { assert.Equal(t, val, s, tag) return true } + mc := collector.NewMetricsCollector() - err := block.SearchTagValues(ctx, tag, cb, common.DefaultSearchOptions()) + err := block.SearchTagValues(ctx, tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) require.True(t, wasCalled, tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -145,10 +151,13 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) { got = append(got, v) return false } + mc := collector.NewMetricsCollector() - err := block.SearchTagValuesV2(ctx, tc.tag, cb, common.DefaultSearchOptions()) + err := block.SearchTagValuesV2(ctx, tc.tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err, tc.tag) require.Equal(t, tc.vals, got, "tag=%v", tc.tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -169,11 +178,12 @@ func BenchmarkBackendBlockSearchTags(b *testing.B) { block := newBackendBlock(meta, rr) opts := common.DefaultSearchOptions() d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, opts) + err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, mc.Add, opts) require.NoError(b, err) } } @@ -203,9 +213,10 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) { for _, tc := range testCases { b.Run(tc, func(b *testing.B) { d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTagValues(ctx, tc, d.Collect, opts) + err := block.SearchTagValues(ctx, tc, d.Collect, mc.Add, opts) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet2/compactor.go b/tempodb/encoding/vparquet2/compactor.go index 88e5a70686e..ccf1e2f87f8 100644 --- a/tempodb/encoding/vparquet2/compactor.go +++ b/tempodb/encoding/vparquet2/compactor.go @@ -251,7 +251,22 @@ func (c *Compactor) finishBlock(ctx context.Context, block *streamingBlock, l lo return fmt.Errorf("error completing block: %w", err) } - level.Info(l).Log("msg", "wrote compacted block", "meta", fmt.Sprintf("%+v", block.meta)) + level.Info(l).Log("msg", "wrote compacted block", + "version", block.meta.Version, + "tenantID", block.meta.TenantID, + "blockID", block.meta.BlockID.String(), + "startTime", block.meta.StartTime.String(), + "endTime", block.meta.EndTime.String(), + "totalObjects", block.meta.TotalObjects, + "size", block.meta.Size_, + "compactionLevel", block.meta.CompactionLevel, + "encoding", block.meta.Encoding.String(), + "totalRecords", block.meta.TotalObjects, + "bloomShardCount", block.meta.BloomShardCount, + "footerSize", block.meta.FooterSize, + "replicationFactor", block.meta.ReplicationFactor, + ) + compactionLevel := int(block.meta.CompactionLevel) - 1 if c.opts.BytesWritten != nil { c.opts.BytesWritten(compactionLevel, bytesFlushed) diff --git a/tempodb/encoding/vparquet2/wal_block.go b/tempodb/encoding/vparquet2/wal_block.go index 05ecb7c121f..583e312f13b 100644 --- a/tempodb/encoding/vparquet2/wal_block.go +++ b/tempodb/encoding/vparquet2/wal_block.go @@ -573,7 +573,7 @@ func (b *walBlock) Search(ctx context.Context, req *tempopb.SearchRequest, _ com return results, nil } -func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, _ common.SearchOptions) error { +func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, _ common.SearchOptions) error { for i, blockFlush := range b.readFlushes() { file, err := blockFlush.file(ctx) if err != nil { @@ -587,12 +587,13 @@ func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil } -func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { att, ok := translateTagToAttribute[tag] if !ok { att = traceql.NewAttribute(tag) @@ -604,10 +605,10 @@ func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.Ta return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, _ common.SearchOptions) error { +func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, _ common.SearchOptions) error { for i, blockFlush := range b.readFlushes() { file, err := blockFlush.file(ctx) if err != nil { @@ -621,6 +622,7 @@ func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil @@ -671,11 +673,11 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, opt }, nil } -func (b *walBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.SearchOptions) error { +func (b *walBlock) FetchTagValues(context.Context, traceql.FetchTagValuesRequest, traceql.FetchTagValuesCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } -func (b *walBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.SearchOptions) error { +func (b *walBlock) FetchTagNames(context.Context, traceql.FetchTagsRequest, traceql.FetchTagsCallback, common.MetricsCallback, common.SearchOptions) error { return common.ErrUnsupported } diff --git a/tempodb/encoding/vparquet2/wal_block_test.go b/tempodb/encoding/vparquet2/wal_block_test.go index 0dc667804cb..d7ad999c675 100644 --- a/tempodb/encoding/vparquet2/wal_block_test.go +++ b/tempodb/encoding/vparquet2/wal_block_test.go @@ -9,6 +9,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/google/uuid" + "github.com/grafana/tempo/pkg/collector" "github.com/grafana/tempo/pkg/model" "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" @@ -372,11 +373,12 @@ func BenchmarkWalSearchTagValues(b *testing.B) { cb := func(_ string) bool { return true } + mc := collector.NewMetricsCollector() for _, t := range tags { b.Run(t, func(b *testing.B) { for i := 0; i < b.N; i++ { - err := w.SearchTagValues(context.TODO(), t, cb, common.DefaultSearchOptions()) + err := w.SearchTagValues(context.TODO(), t, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet3/block_autocomplete.go b/tempodb/encoding/vparquet3/block_autocomplete.go index 89421a3182f..cbdc8427516 100644 --- a/tempodb/encoding/vparquet3/block_autocomplete.go +++ b/tempodb/encoding/vparquet3/block_autocomplete.go @@ -36,7 +36,7 @@ func (r tagRequest) keysRequested(scope traceql.AttributeScope) bool { return r.scope == scope } -func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return errors.Wrap(err, "conditions invalid") @@ -51,13 +51,15 @@ func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsR if len(req.Conditions) < 1 || mingledConditions { return b.SearchTags(ctx, req.Scope, func(t string, scope traceql.AttributeScope) { cb(t, scope) - }, opts) + }, mcb, opts) } - pf, _, err := b.openForSearch(ctx, opts) + pf, rr, err := b.openForSearch(ctx, opts) if err != nil { return err } + // report metrics with defer to handle early exit + defer mcb(rr.BytesRead()) tr := tagRequest{ conditions: req.Conditions, @@ -147,7 +149,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d } } -func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return errors.Wrap(err, "conditions invalid") @@ -160,13 +162,15 @@ func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagV // Last check. No conditions, use old path. It's much faster. if len(req.Conditions) <= 1 || mingledConditions { // <= 1 because we always have a "OpNone" condition for the tag name - return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), common.DefaultSearchOptions()) + return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), mcb, common.DefaultSearchOptions()) } - pf, _, err := b.openForSearch(ctx, opts) + pf, rr, err := b.openForSearch(ctx, opts) if err != nil { return err } + // report metrics with defer to handle early exit + defer mcb(rr.BytesRead()) tr := tagRequest{ conditions: req.Conditions, diff --git a/tempodb/encoding/vparquet3/block_autocomplete_test.go b/tempodb/encoding/vparquet3/block_autocomplete_test.go index 5b6ef06e30f..b4964c45f11 100644 --- a/tempodb/encoding/vparquet3/block_autocomplete_test.go +++ b/tempodb/encoding/vparquet3/block_autocomplete_test.go @@ -220,12 +220,15 @@ func TestFetchTagNames(t *testing.T) { Conditions: req.Conditions, Scope: scope, } + mc := collector.NewMetricsCollector() err = block.FetchTagNames(ctx, autocompleteReq, func(t string, scope traceql.AttributeScope) bool { distinctAttrNames.Collect(scope.String(), t) return false - }, opts) + }, mc.Add, opts) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) actualValues := distinctAttrNames.Strings() @@ -511,9 +514,11 @@ func TestFetchTagValues(t *testing.T) { Attribute: tagAtrr, Op: traceql.OpNone, }) - - err = block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), opts) + mc := collector.NewMetricsCollector() + err = block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), mc.Add, opts) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) expectedValues := tc.expectedValues actualValues := distinctValues.Values() @@ -608,10 +613,11 @@ func BenchmarkFetchTagValues(b *testing.B) { Conditions: req.Conditions, TagName: tag, } + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), opts) + err := block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), mc.Add, opts) require.NoError(b, err) } }) @@ -680,13 +686,14 @@ func BenchmarkFetchTags(b *testing.B) { Conditions: req.Conditions, Scope: scope, } + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { err := block.FetchTagNames(ctx, autocompleteReq, func(t string, scope traceql.AttributeScope) bool { distinctStrings.Collect(scope.String(), t) return false - }, opts) + }, mc.Add, opts) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet3/block_search_tags.go b/tempodb/encoding/vparquet3/block_search_tags.go index 877aa07ebf2..87e742557a3 100644 --- a/tempodb/encoding/vparquet3/block_search_tags.go +++ b/tempodb/encoding/vparquet3/block_search_tags.go @@ -44,7 +44,7 @@ var nonTraceQLAttributes = map[string]string{ LabelRootSpanName: columnPathRootSpanName, } -func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTags", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -57,7 +57,10 @@ func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeSc if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + defer func() { + mcb(rr.BytesRead()) // capture bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTags(derivedCtx, scope, cb, pf, b.meta.DedicatedColumns) } @@ -190,7 +193,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC return nil } -func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { att, ok := translateTagToAttribute[tag] if !ok { att = traceql.NewAttribute(tag) @@ -202,10 +205,10 @@ func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb commo return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTagValuesV2", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -218,7 +221,10 @@ func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attrib if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + defer func() { + mcb(rr.BytesRead()) // capture bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTagValues(derivedCtx, tag, cb, pf, b.meta.DedicatedColumns) } diff --git a/tempodb/encoding/vparquet3/block_search_tags_test.go b/tempodb/encoding/vparquet3/block_search_tags_test.go index b8ac79db3e0..31e1ae2a704 100644 --- a/tempodb/encoding/vparquet3/block_search_tags_test.go +++ b/tempodb/encoding/vparquet3/block_search_tags_test.go @@ -24,10 +24,13 @@ func TestBackendBlockSearchTags(t *testing.T) { cb := func(s string, _ traceql.AttributeScope) { foundAttrs[s] = struct{}{} } + mc := collector.NewMetricsCollector() ctx := context.Background() - err := block.SearchTags(ctx, scope, cb, common.DefaultSearchOptions()) + err := block.SearchTags(ctx, scope, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) // test that all attrs are in found attrs for k := range attrs { @@ -71,10 +74,13 @@ func TestBackendBlockSearchTagValues(t *testing.T) { assert.Equal(t, val, s, tag) return true } + mc := collector.NewMetricsCollector() - err := block.SearchTagValues(ctx, tag, cb, common.DefaultSearchOptions()) + err := block.SearchTagValues(ctx, tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) require.True(t, wasCalled, tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -172,9 +178,12 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) { return false } - err := block.SearchTagValuesV2(ctx, tc.tag, cb, common.DefaultSearchOptions()) + mc := collector.NewMetricsCollector() + err := block.SearchTagValuesV2(ctx, tc.tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err, tc.tag) require.Equal(t, tc.vals, got, "tag=%v", tc.tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -195,11 +204,12 @@ func BenchmarkBackendBlockSearchTags(b *testing.B) { block := newBackendBlock(meta, rr) opts := common.DefaultSearchOptions() d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, opts) + err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, mc.Add, opts) require.NoError(b, err) } } @@ -229,9 +239,11 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) { for _, tc := range testCases { b.Run(tc, func(b *testing.B) { d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() + b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTagValues(ctx, tc, d.Collect, opts) + err := block.SearchTagValues(ctx, tc, d.Collect, mc.Add, opts) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet3/compactor.go b/tempodb/encoding/vparquet3/compactor.go index 19164617529..7d66cadcf43 100644 --- a/tempodb/encoding/vparquet3/compactor.go +++ b/tempodb/encoding/vparquet3/compactor.go @@ -259,7 +259,22 @@ func (c *Compactor) finishBlock(ctx context.Context, block *streamingBlock, l lo return fmt.Errorf("error completing block: %w", err) } - level.Info(l).Log("msg", "wrote compacted block", "meta", fmt.Sprintf("%+v", block.meta)) + level.Info(l).Log("msg", "wrote compacted block", + "version", block.meta.Version, + "tenantID", block.meta.TenantID, + "blockID", block.meta.BlockID.String(), + "startTime", block.meta.StartTime.String(), + "endTime", block.meta.EndTime.String(), + "totalObjects", block.meta.TotalObjects, + "size", block.meta.Size_, + "compactionLevel", block.meta.CompactionLevel, + "encoding", block.meta.Encoding.String(), + "totalRecords", block.meta.TotalObjects, + "bloomShardCount", block.meta.BloomShardCount, + "footerSize", block.meta.FooterSize, + "replicationFactor", block.meta.ReplicationFactor, + ) + compactionLevel := int(block.meta.CompactionLevel) - 1 if c.opts.BytesWritten != nil { c.opts.BytesWritten(compactionLevel, bytesFlushed) diff --git a/tempodb/encoding/vparquet3/wal_block.go b/tempodb/encoding/vparquet3/wal_block.go index a37add6e10d..d58d1ff6c52 100644 --- a/tempodb/encoding/vparquet3/wal_block.go +++ b/tempodb/encoding/vparquet3/wal_block.go @@ -587,7 +587,7 @@ func (b *walBlock) Search(ctx context.Context, req *tempopb.SearchRequest, _ com return results, nil } -func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, _ common.SearchOptions) error { +func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, _ common.SearchOptions) error { for i, blockFlush := range b.readFlushes() { file, err := blockFlush.file(ctx) if err != nil { @@ -601,12 +601,13 @@ func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil } -func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { att, ok := translateTagToAttribute[tag] if !ok { att = traceql.NewAttribute(tag) @@ -618,10 +619,10 @@ func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.Ta return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, _ common.SearchOptions) error { +func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, _ common.SearchOptions) error { for i, blockFlush := range b.readFlushes() { file, err := blockFlush.file(ctx) if err != nil { @@ -635,6 +636,7 @@ func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil @@ -685,7 +687,7 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, _ c }, nil } -func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return fmt.Errorf("conditions invalid: %w", err) @@ -697,7 +699,7 @@ func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValue } if len(req.Conditions) <= 1 || mingledConditions { // Last check. No conditions, use old path. It's much faster. - return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), common.DefaultSearchOptions()) + return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), mcb, common.DefaultSearchOptions()) } blockFlushes := b.readFlushes() @@ -733,18 +735,20 @@ func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValue v := oe.Value.(traceql.Static) if cb(v) { iter.Close() - return nil // We have enough values + mcb(file.r.BytesRead()) // record bytes read + return nil // We have enough values } } } iter.Close() + mcb(file.r.BytesRead()) // record bytes read } // combine iters? return nil } -func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return fmt.Errorf("conditions invalid: %w", err) @@ -758,7 +762,7 @@ func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsReque if len(req.Conditions) < 1 || mingledConditions { return b.SearchTags(ctx, req.Scope, func(t string, scope traceql.AttributeScope) { cb(t, scope) - }, opts) + }, mcb, opts) } blockFlushes := b.readFlushes() @@ -792,11 +796,13 @@ func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsReque for _, oe := range res.OtherEntries { if cb(oe.Key, oe.Value.(traceql.AttributeScope)) { iter.Close() - return nil // We have enough values + mcb(file.r.BytesRead()) // record bytes read + return nil // We have enough values } } } iter.Close() + mcb(file.r.BytesRead()) // record bytes read // add well known tagNamesForSpecialColumns(req.Scope, file.parquetFile, b.meta.DedicatedColumns, cb) diff --git a/tempodb/encoding/vparquet3/wal_block_test.go b/tempodb/encoding/vparquet3/wal_block_test.go index dea3d9c242f..eeafc452c07 100644 --- a/tempodb/encoding/vparquet3/wal_block_test.go +++ b/tempodb/encoding/vparquet3/wal_block_test.go @@ -9,6 +9,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/google/uuid" + "github.com/grafana/tempo/pkg/collector" "github.com/grafana/tempo/pkg/model" "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" @@ -372,11 +373,12 @@ func BenchmarkWalSearchTagValues(b *testing.B) { cb := func(_ string) bool { return true } + mc := collector.NewMetricsCollector() for _, t := range tags { b.Run(t, func(b *testing.B) { for i := 0; i < b.N; i++ { - err := w.SearchTagValues(context.TODO(), t, cb, common.DefaultSearchOptions()) + err := w.SearchTagValues(context.TODO(), t, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet4/block_autocomplete.go b/tempodb/encoding/vparquet4/block_autocomplete.go index c91922e9817..6dbda2551db 100644 --- a/tempodb/encoding/vparquet4/block_autocomplete.go +++ b/tempodb/encoding/vparquet4/block_autocomplete.go @@ -36,7 +36,7 @@ func (r tagRequest) keysRequested(scope traceql.AttributeScope) bool { return r.scope == scope } -func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return errors.Wrap(err, "conditions invalid") @@ -51,14 +51,17 @@ func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsR if len(req.Conditions) < 1 || mingledConditions { return b.SearchTags(ctx, req.Scope, func(t string, scope traceql.AttributeScope) { cb(t, scope) - }, opts) + }, mcb, opts) } - pf, _, err := b.openForSearch(ctx, opts) + pf, rr, err := b.openForSearch(ctx, opts) if err != nil { return err } + // report metrics with defer to handle early exit + defer mcb(rr.BytesRead()) + tr := tagRequest{ conditions: req.Conditions, scope: req.Scope, @@ -87,7 +90,6 @@ func (b *backendBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsR } tagNamesForSpecialColumns(req.Scope, pf, b.meta.DedicatedColumns, cb) - return nil } @@ -147,7 +149,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d } } -func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { err := checkConditions(req.Conditions) if err != nil { return errors.Wrap(err, "conditions invalid") @@ -160,13 +162,15 @@ func (b *backendBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagV // Last check. No conditions, use old path. It's much faster. if len(req.Conditions) <= 1 || mingledConditions { // <= 1 because we always have a "OpNone" condition for the tag name - return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), common.DefaultSearchOptions()) + return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), mcb, common.DefaultSearchOptions()) } - pf, _, err := b.openForSearch(ctx, opts) + pf, rr, err := b.openForSearch(ctx, opts) if err != nil { return err } + // report metrics with defer to handle early exit + defer mcb(rr.BytesRead()) tr := tagRequest{ conditions: req.Conditions, diff --git a/tempodb/encoding/vparquet4/block_autocomplete_test.go b/tempodb/encoding/vparquet4/block_autocomplete_test.go index 3d745eda2b4..d0f87aa82af 100644 --- a/tempodb/encoding/vparquet4/block_autocomplete_test.go +++ b/tempodb/encoding/vparquet4/block_autocomplete_test.go @@ -327,12 +327,15 @@ func TestFetchTagNames(t *testing.T) { Conditions: req.Conditions, Scope: scope, } + mc := collector.NewMetricsCollector() err = block.FetchTagNames(ctx, autocompleteReq, func(t string, scope traceql.AttributeScope) bool { distinctAttrNames.Collect(scope.String(), t) return false - }, opts) + }, mc.Add, opts) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) actualValues := distinctAttrNames.Strings() @@ -634,9 +637,12 @@ func TestFetchTagValues(t *testing.T) { Attribute: tagAtrr, Op: traceql.OpNone, }) + mc := collector.NewMetricsCollector() - err = block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), opts) + err = block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), mc.Add, opts) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) expectedValues := tc.expectedValues actualValues := distinctValues.Values() @@ -731,10 +737,11 @@ func BenchmarkFetchTagValues(b *testing.B) { Conditions: req.Conditions, TagName: tag, } + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), opts) + err := block.FetchTagValues(ctx, autocompleteReq, traceql.MakeCollectTagValueFunc(distinctValues.Collect), mc.Add, opts) require.NoError(b, err) } }) @@ -803,13 +810,14 @@ func BenchmarkFetchTags(b *testing.B) { Conditions: req.Conditions, Scope: scope, } + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { err := block.FetchTagNames(ctx, autocompleteReq, func(t string, scope traceql.AttributeScope) bool { distinctStrings.Collect(scope.String(), t) return false - }, opts) + }, mc.Add, opts) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet4/block_search_tags.go b/tempodb/encoding/vparquet4/block_search_tags.go index 7913d01406c..78ff2a9f076 100644 --- a/tempodb/encoding/vparquet4/block_search_tags.go +++ b/tempodb/encoding/vparquet4/block_search_tags.go @@ -44,7 +44,7 @@ var nonTraceQLAttributes = map[string]string{ LabelRootSpanName: columnPathRootSpanName, } -func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTags", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -57,11 +57,15 @@ func (b *backendBlock) SearchTags(ctx context.Context, scope traceql.AttributeSc if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + defer func() { + mcb(rr.BytesRead()) // report bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTags(derivedCtx, scope, cb, pf, b.meta.DedicatedColumns) } +// modify cb signature to also take in the func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsCallback, pf *parquet.File, dc backend.DedicatedColumns) error { scanColumns := func(standardKeyPath string, specialMappings map[string]string, columnMapping dedicatedColumnMapping, cb common.TagsCallback, scope traceql.AttributeScope) error { specialAttrIdxs := map[int]string{} @@ -211,7 +215,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC return nil } -func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { att, ok := translateTagToAttribute[tag] if !ok { att = traceql.NewAttribute(tag) @@ -223,10 +227,10 @@ func (b *backendBlock) SearchTagValues(ctx context.Context, tag string, cb commo return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, opts common.SearchOptions) error { +func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, opts common.SearchOptions) error { derivedCtx, span := tracer.Start(ctx, "parquet.backendBlock.SearchTagValuesV2", trace.WithAttributes( attribute.String("blockID", b.meta.BlockID.String()), @@ -239,8 +243,11 @@ func (b *backendBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attrib if err != nil { return fmt.Errorf("unexpected error opening parquet file: %w", err) } - // TODO(suraj): push this BytesRead to SLO middleware - defer func() { span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) }() + // capture bytes read into metrics callback and span + defer func() { + mcb(rr.BytesRead()) // report bytes read + span.SetAttributes(attribute.Int64("inspectedBytes", int64(rr.BytesRead()))) + }() return searchTagValues(derivedCtx, tag, cb, pf, b.meta.DedicatedColumns) } diff --git a/tempodb/encoding/vparquet4/block_search_tags_test.go b/tempodb/encoding/vparquet4/block_search_tags_test.go index 0ca5a0dcc94..2c734da42b1 100644 --- a/tempodb/encoding/vparquet4/block_search_tags_test.go +++ b/tempodb/encoding/vparquet4/block_search_tags_test.go @@ -24,10 +24,13 @@ func TestBackendBlockSearchTags(t *testing.T) { cb := func(s string, _ traceql.AttributeScope) { foundAttrs[s] = struct{}{} } + mc := collector.NewMetricsCollector() ctx := context.Background() - err := block.SearchTags(ctx, scope, cb, common.DefaultSearchOptions()) + err := block.SearchTags(ctx, scope, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) // test that all attrs are in found attrs for k := range attrs { @@ -71,10 +74,13 @@ func TestBackendBlockSearchTagValues(t *testing.T) { assert.Equal(t, val, s, tag) return true } + mc := collector.NewMetricsCollector() - err := block.SearchTagValues(ctx, tag, cb, common.DefaultSearchOptions()) + err := block.SearchTagValues(ctx, tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err) require.True(t, wasCalled, tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -171,10 +177,13 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) { got = append(got, v) return false } + mc := collector.NewMetricsCollector() - err := block.SearchTagValuesV2(ctx, tc.tag, cb, common.DefaultSearchOptions()) + err := block.SearchTagValuesV2(ctx, tc.tag, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(t, err, tc.tag) require.Equal(t, tc.vals, got, "tag=%v", tc.tag) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } } @@ -195,11 +204,12 @@ func BenchmarkBackendBlockSearchTags(b *testing.B) { block := newBackendBlock(meta, rr) opts := common.DefaultSearchOptions() d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, opts) + err := block.SearchTags(ctx, traceql.AttributeScopeNone, func(s string, _ traceql.AttributeScope) { d.Collect(s) }, mc.Add, opts) require.NoError(b, err) } } @@ -229,9 +239,10 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) { for _, tc := range testCases { b.Run(tc, func(b *testing.B) { d := collector.NewDistinctString(1_000_000) + mc := collector.NewMetricsCollector() b.ResetTimer() for i := 0; i < b.N; i++ { - err := block.SearchTagValues(ctx, tc, d.Collect, opts) + err := block.SearchTagValues(ctx, tc, d.Collect, mc.Add, opts) require.NoError(b, err) } }) diff --git a/tempodb/encoding/vparquet4/block_traceql_test.go b/tempodb/encoding/vparquet4/block_traceql_test.go index f4a3a1a3bea..8c81e0d217a 100644 --- a/tempodb/encoding/vparquet4/block_traceql_test.go +++ b/tempodb/encoding/vparquet4/block_traceql_test.go @@ -715,7 +715,7 @@ func TestBackendBlockSelectAll(t *testing.T) { b := makeBackendBlockWithTraces(t, traces) - _, _, _, req, err := traceql.NewEngine().Compile("{}") + _, _, _, req, err := traceql.Compile("{}") require.NoError(t, err) req.SecondPass = func(inSS *traceql.Spanset) ([]*traceql.Spanset, error) { return []*traceql.Spanset{inSS}, nil } req.SecondPassSelectAll = true diff --git a/tempodb/encoding/vparquet4/compactor.go b/tempodb/encoding/vparquet4/compactor.go index 8cf64d08658..cb3532b5540 100644 --- a/tempodb/encoding/vparquet4/compactor.go +++ b/tempodb/encoding/vparquet4/compactor.go @@ -260,7 +260,21 @@ func (c *Compactor) finishBlock(ctx context.Context, block *streamingBlock, l lo return fmt.Errorf("error completing block: %w", err) } - level.Info(l).Log("msg", "wrote compacted block", "meta", fmt.Sprintf("%+v", block.meta)) + level.Info(l).Log("msg", "wrote compacted block", + "version", block.meta.Version, + "tenantID", block.meta.TenantID, + "blockID", block.meta.BlockID.String(), + "startTime", block.meta.StartTime.String(), + "endTime", block.meta.EndTime.String(), + "totalObjects", block.meta.TotalObjects, + "size", block.meta.Size_, + "compactionLevel", block.meta.CompactionLevel, + "encoding", block.meta.Encoding.String(), + "totalRecords", block.meta.TotalObjects, + "bloomShardCount", block.meta.BloomShardCount, + "footerSize", block.meta.FooterSize, + "replicationFactor", block.meta.ReplicationFactor, + ) span.AddEvent("wrote compacted block") span.SetAttributes( diff --git a/tempodb/encoding/vparquet4/wal_block.go b/tempodb/encoding/vparquet4/wal_block.go index 17dfaa2bddd..dcb476bd856 100644 --- a/tempodb/encoding/vparquet4/wal_block.go +++ b/tempodb/encoding/vparquet4/wal_block.go @@ -594,7 +594,7 @@ func (b *walBlock) Search(ctx context.Context, req *tempopb.SearchRequest, _ com return results, nil } -func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, _ common.SearchOptions) error { +func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, cb common.TagsCallback, mcb common.MetricsCallback, _ common.SearchOptions) error { ctx, span := tracer.Start(ctx, "walBlock.SearchTags") defer span.End() @@ -611,12 +611,13 @@ func (b *walBlock) SearchTags(ctx context.Context, scope traceql.AttributeScope, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil } -func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, opts common.SearchOptions) error { +func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.TagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "walBlock.SearchTags") defer span.End() @@ -631,10 +632,10 @@ func (b *walBlock) SearchTagValues(ctx context.Context, tag string, cb common.Ta return false } - return b.SearchTagValuesV2(ctx, att, cb2, opts) + return b.SearchTagValuesV2(ctx, att, cb2, mcb, opts) } -func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, _ common.SearchOptions) error { +func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb common.TagValuesCallbackV2, mcb common.MetricsCallback, _ common.SearchOptions) error { ctx, span := tracer.Start(ctx, "walBlock.SearchTagsV2") defer span.End() @@ -651,6 +652,7 @@ func (b *walBlock) SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, if err != nil { return fmt.Errorf("error searching block [%s %d]: %w", b.meta.BlockID.String(), i, err) } + mcb(file.r.BytesRead()) // record bytes read } return nil @@ -693,6 +695,7 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, _ c Results: &mergeSpansetIterator{ iters: iters, }, + // FIXME: can this be simplified with the common.MetadataCallback?? and Metrics Collector?? Bytes: func() uint64 { // read value when callback is called var totalBytesRead uint64 @@ -704,7 +707,7 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, _ c }, nil } -func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "walBlock.FetchTagValues") defer span.End() @@ -719,7 +722,7 @@ func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValue } if len(req.Conditions) <= 1 || mingledConditions { // Last check. No conditions, use old path. It's much faster. - return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), common.DefaultSearchOptions()) + return b.SearchTagValuesV2(ctx, req.TagName, common.TagValuesCallbackV2(cb), mcb, common.DefaultSearchOptions()) } blockFlushes := b.readFlushes() @@ -757,18 +760,20 @@ func (b *walBlock) FetchTagValues(ctx context.Context, req traceql.FetchTagValue v := oe.Value.(traceql.Static) if cb(v) { iter.Close() - return nil // We have enough values + mcb(file.r.BytesRead()) // record bytes read + return nil // We have enough values } } } iter.Close() + mcb(file.r.BytesRead()) // record bytes read } // combine iters? return nil } -func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { ctx, span := tracer.Start(ctx, "walBlock.FetchTagNames") defer span.End() @@ -785,7 +790,7 @@ func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsReque if len(req.Conditions) < 1 || mingledConditions { return b.SearchTags(ctx, req.Scope, func(t string, scope traceql.AttributeScope) { cb(t, scope) - }, opts) + }, mcb, opts) } blockFlushes := b.readFlushes() @@ -819,11 +824,13 @@ func (b *walBlock) FetchTagNames(ctx context.Context, req traceql.FetchTagsReque for _, oe := range res.OtherEntries { if cb(oe.Key, oe.Value.(traceql.AttributeScope)) { iter.Close() - return nil // We have enough values + mcb(file.r.BytesRead()) // record bytes read + return nil // We have enough values } } } iter.Close() + mcb(file.r.BytesRead()) // record bytes read // add well known tagNamesForSpecialColumns(req.Scope, file.parquetFile, b.meta.DedicatedColumns, cb) diff --git a/tempodb/encoding/vparquet4/wal_block_test.go b/tempodb/encoding/vparquet4/wal_block_test.go index 50e86d1bb8d..5502333f074 100644 --- a/tempodb/encoding/vparquet4/wal_block_test.go +++ b/tempodb/encoding/vparquet4/wal_block_test.go @@ -9,6 +9,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/google/uuid" + "github.com/grafana/tempo/pkg/collector" "github.com/grafana/tempo/pkg/model" "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" @@ -372,11 +373,12 @@ func BenchmarkWalSearchTagValues(b *testing.B) { cb := func(_ string) bool { return true } + mc := collector.NewMetricsCollector() for _, t := range tags { b.Run(t, func(b *testing.B) { for i := 0; i < b.N; i++ { - err := w.SearchTagValues(context.TODO(), t, cb, common.DefaultSearchOptions()) + err := w.SearchTagValues(context.TODO(), t, cb, mc.Add, common.DefaultSearchOptions()) require.NoError(b, err) } }) diff --git a/tempodb/tempodb.go b/tempodb/tempodb.go index d934634e15b..86ceebed9bc 100644 --- a/tempodb/tempodb.go +++ b/tempodb/tempodb.go @@ -83,12 +83,13 @@ type Reader interface { Find(ctx context.Context, tenantID string, id common.ID, blockStart string, blockEnd string, timeStart int64, timeEnd int64, opts common.SearchOptions) ([]*tempopb.Trace, []error, error) Search(ctx context.Context, meta *backend.BlockMeta, req *tempopb.SearchRequest, opts common.SearchOptions) (*tempopb.SearchResponse, error) SearchTags(ctx context.Context, meta *backend.BlockMeta, scope string, opts common.SearchOptions) (*tempopb.SearchTagsV2Response, error) - SearchTagValues(ctx context.Context, meta *backend.BlockMeta, tag string, opts common.SearchOptions) ([]string, error) + SearchTagValues(ctx context.Context, meta *backend.BlockMeta, tag string, opts common.SearchOptions) (*tempopb.SearchTagValuesResponse, error) SearchTagValuesV2(ctx context.Context, meta *backend.BlockMeta, req *tempopb.SearchTagValuesRequest, opts common.SearchOptions) (*tempopb.SearchTagValuesV2Response, error) + // TODO(suraj): use common.MetricsCallback in Fetch and remove the Bytes callback from traceql.FetchSpansResponse Fetch(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchSpansRequest, opts common.SearchOptions) (traceql.FetchSpansResponse, error) - FetchTagValues(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error - FetchTagNames(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error + FetchTagValues(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error + FetchTagNames(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error BlockMetas(tenantID string) []*backend.BlockMeta EnablePolling(ctx context.Context, sharder blocklist.JobSharder) @@ -338,7 +339,7 @@ func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, return nil, fmt.Errorf("error finding trace by id, blockID: %s: %w", meta.BlockID.String(), err) } - level.Info(logger).Log("msg", "searching for trace in block", "findTraceID", hex.EncodeToString(id), "block", meta.BlockID, "found", foundObject != nil) + level.Debug(logger).Log("msg", "searching for trace in block", "findTraceID", hex.EncodeToString(id), "block", meta.BlockID, "found", foundObject != nil) return foundObject, nil }) @@ -381,11 +382,12 @@ func (rw *readerWriter) SearchTags(ctx context.Context, meta *backend.BlockMeta, } distinctValues := collector.NewScopedDistinctString(0) // todo: propagate limit? + mc := collector.NewMetricsCollector() rw.cfg.Search.ApplyToOptions(&opts) err = block.SearchTags(ctx, attributeScope, func(s string, scope traceql.AttributeScope) { distinctValues.Collect(scope.String(), s) - }, opts) + }, mc.Add, opts) if err != nil { return nil, err } @@ -393,7 +395,8 @@ func (rw *readerWriter) SearchTags(ctx context.Context, meta *backend.BlockMeta, // build response collected := distinctValues.Strings() resp := &tempopb.SearchTagsV2Response{ - Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(collected)), + Scopes: make([]*tempopb.SearchTagsV2Scope, 0, len(collected)), + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, } for scope, vals := range collected { resp.Scopes = append(resp.Scopes, &tempopb.SearchTagsV2Scope{ @@ -405,17 +408,21 @@ func (rw *readerWriter) SearchTags(ctx context.Context, meta *backend.BlockMeta, return resp, nil } -func (rw *readerWriter) SearchTagValues(ctx context.Context, meta *backend.BlockMeta, tag string, opts common.SearchOptions) ([]string, error) { +func (rw *readerWriter) SearchTagValues(ctx context.Context, meta *backend.BlockMeta, tag string, opts common.SearchOptions) (response *tempopb.SearchTagValuesResponse, err error) { block, err := encoding.OpenBlock(meta, rw.r) if err != nil { - return nil, err + return &tempopb.SearchTagValuesResponse{}, err } dv := collector.NewDistinctString(0) + mc := collector.NewMetricsCollector() rw.cfg.Search.ApplyToOptions(&opts) - err = block.SearchTagValues(ctx, tag, dv.Collect, opts) + err = block.SearchTagValues(ctx, tag, dv.Collect, mc.Add, opts) - return dv.Strings(), err + return &tempopb.SearchTagValuesResponse{ + TagValues: dv.Strings(), + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, + }, err } func (rw *readerWriter) SearchTagValuesV2(ctx context.Context, meta *backend.BlockMeta, req *tempopb.SearchTagValuesRequest, opts common.SearchOptions) (*tempopb.SearchTagValuesV2Response, error) { @@ -430,13 +437,16 @@ func (rw *readerWriter) SearchTagValuesV2(ctx context.Context, meta *backend.Blo } dv := collector.NewDistinctValue[tempopb.TagValue](0, func(v tempopb.TagValue) int { return len(v.Type) + len(v.Value) }) + mc := collector.NewMetricsCollector() rw.cfg.Search.ApplyToOptions(&opts) - err = block.SearchTagValuesV2(ctx, tag, traceql.MakeCollectTagValueFunc(dv.Collect), opts) + err = block.SearchTagValuesV2(ctx, tag, traceql.MakeCollectTagValueFunc(dv.Collect), mc.Add, opts) if err != nil { return nil, err } - resp := &tempopb.SearchTagValuesV2Response{} + resp := &tempopb.SearchTagValuesV2Response{ + Metrics: &tempopb.MetadataMetrics{InspectedBytes: mc.TotalValue()}, + } for _, v := range dv.Values() { v2 := v resp.TagValues = append(resp.TagValues, &v2) @@ -456,24 +466,24 @@ func (rw *readerWriter) Fetch(ctx context.Context, meta *backend.BlockMeta, req return block.Fetch(ctx, req, opts) } -func (rw *readerWriter) FetchTagValues(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, opts common.SearchOptions) error { +func (rw *readerWriter) FetchTagValues(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { block, err := encoding.OpenBlock(meta, rw.r) if err != nil { return err } rw.cfg.Search.ApplyToOptions(&opts) - return block.FetchTagValues(ctx, req, cb, opts) + return block.FetchTagValues(ctx, req, cb, mcb, opts) } -func (rw *readerWriter) FetchTagNames(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, opts common.SearchOptions) error { +func (rw *readerWriter) FetchTagNames(ctx context.Context, meta *backend.BlockMeta, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback, mcb common.MetricsCallback, opts common.SearchOptions) error { block, err := encoding.OpenBlock(meta, rw.r) if err != nil { return err } rw.cfg.Search.ApplyToOptions(&opts) - return block.FetchTagNames(ctx, req, cb, opts) + return block.FetchTagNames(ctx, req, cb, mcb, opts) } func (rw *readerWriter) Shutdown() { diff --git a/tempodb/tempodb_search_test.go b/tempodb/tempodb_search_test.go index 5c93c26975d..58d72e8ba08 100644 --- a/tempodb/tempodb_search_test.go +++ b/tempodb/tempodb_search_test.go @@ -1383,11 +1383,12 @@ func tagValuesRunner(t *testing.T, _ *tempopb.Trace, _ *tempopb.TraceSearchMetad for _, tc := range tcs { t.Run(tc.name, func(t *testing.T) { + valueCollector := collector.NewDistinctValue[tempopb.TagValue](0, func(_ tempopb.TagValue) int { return 0 }) + mc := collector.NewMetricsCollector() fetcher := traceql.NewTagValuesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback) error { - return bb.FetchTagValues(ctx, req, cb, common.DefaultSearchOptions()) + return bb.FetchTagValues(ctx, req, cb, mc.Add, common.DefaultSearchOptions()) }) - valueCollector := collector.NewDistinctValue[tempopb.TagValue](0, func(_ tempopb.TagValue) int { return 0 }) err := e.ExecuteTagValues(ctx, tc.tag, tc.query, traceql.MakeCollectTagValueFunc(valueCollector.Collect), fetcher) if errors.Is(err, common.ErrUnsupported) { return @@ -1404,6 +1405,8 @@ func tagValuesRunner(t *testing.T, _ *tempopb.Trace, _ *tempopb.TraceSearchMetad }) require.Equal(t, expected, actual) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) }) } } @@ -1451,14 +1454,14 @@ func tagNamesRunner(t *testing.T, _ *tempopb.Trace, _ *tempopb.TraceSearchMetada for _, tc := range tcs { t.Run(tc.name, func(t *testing.T) { + mc := collector.NewMetricsCollector() fetcher := traceql.NewTagNamesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagsRequest, cb traceql.FetchTagsCallback) error { - return bb.FetchTagNames(ctx, req, cb, common.DefaultSearchOptions()) + return bb.FetchTagNames(ctx, req, cb, mc.Add, common.DefaultSearchOptions()) }) valueCollector := collector.NewScopedDistinctString(0) err := e.ExecuteTagNames(ctx, traceql.AttributeScopeFromString(tc.scope), tc.query, func(tag string, scope traceql.AttributeScope) bool { - valueCollector.Collect(scope.String(), tag) - return valueCollector.Exceeded() + return valueCollector.Collect(scope.String(), tag) }, fetcher) if errors.Is(err, common.ErrUnsupported) { return @@ -1482,6 +1485,8 @@ func tagNamesRunner(t *testing.T, _ *tempopb.Trace, _ *tempopb.TraceSearchMetada slices.Sort(expected) require.Equal(t, expected, actual, "key: %s", k) } + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) }) } } @@ -2299,21 +2304,23 @@ func TestSearchForTagsAndTagValues(t *testing.T) { sort.Strings(actualTags) assert.Equal(t, expectedTags, actualTags) - values, err := r.SearchTagValues(context.Background(), block.BlockMeta(), "service.name", common.DefaultSearchOptions()) + respValues, err := r.SearchTagValues(context.Background(), block.BlockMeta(), "service.name", common.DefaultSearchOptions()) + require.NotZero(t, respValues.Metrics.InspectedBytes) require.NoError(t, err) expectedTagsValues := []string{"test-service", "test-service-2"} sort.Strings(expectedTagsValues) - sort.Strings(values) - assert.Equal(t, expectedTagsValues, values) + sort.Strings(respValues.TagValues) + assert.Equal(t, expectedTagsValues, respValues.TagValues) - values, err = r.SearchTagValues(context.Background(), block.BlockMeta(), "intTag", common.DefaultSearchOptions()) + respValues, err = r.SearchTagValues(context.Background(), block.BlockMeta(), "intTag", common.DefaultSearchOptions()) + require.NotZero(t, respValues.Metrics.InspectedBytes) require.NoError(t, err) expectedTagsValues = []string{"2", "3"} sort.Strings(expectedTagsValues) - sort.Strings(values) - assert.Equal(t, expectedTagsValues, values) + sort.Strings(respValues.TagValues) + assert.Equal(t, expectedTagsValues, respValues.TagValues) tagValues, err := r.SearchTagValuesV2(context.Background(), block.BlockMeta(), &tempopb.SearchTagValuesRequest{ TagName: ".service.name", @@ -2362,11 +2369,13 @@ func TestSearchForTagsAndTagValues(t *testing.T) { sort.SliceStable(tagValues.TagValues, func(i, j int) bool { return tagValues.TagValues[i].Value < tagValues.TagValues[j].Value }) - assert.Equal(t, expected, tagValues.TagValues) + require.Equal(t, expected, tagValues.TagValues) + require.NotZero(t, tagValues.Metrics.InspectedBytes) valueCollector := collector.NewDistinctValue[tempopb.TagValue](0, func(_ tempopb.TagValue) int { return 0 }) + mc := collector.NewMetricsCollector() f := traceql.NewTagValuesFetcherWrapper(func(ctx context.Context, req traceql.FetchTagValuesRequest, cb traceql.FetchTagValuesCallback) error { - return r.FetchTagValues(ctx, block.BlockMeta(), req, cb, common.DefaultSearchOptions()) + return r.FetchTagValues(ctx, block.BlockMeta(), req, cb, mc.Add, common.DefaultSearchOptions()) }) tag, err := traceql.ParseIdentifier("span.intTag") @@ -2376,5 +2385,7 @@ func TestSearchForTagsAndTagValues(t *testing.T) { require.NoError(t, err) actual := valueCollector.Values() - assert.Equal(t, []tempopb.TagValue{{Type: "int", Value: "3"}}, actual) + require.Equal(t, []tempopb.TagValue{{Type: "int", Value: "3"}}, actual) + // test that callback is recording bytes read + require.Greater(t, mc.TotalValue(), uint64(100)) } diff --git a/tools/version-tag.sh b/tools/version-tag.sh new file mode 100755 index 00000000000..6dd3208f1e0 --- /dev/null +++ b/tools/version-tag.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o pipefail + +REPO_ROOT=$(git rev-parse --show-toplevel) + +is_valid_semver() { + local version=$1 + # regex taken from https://semver.org/ + if [[ $version =~ ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$ ]] ;then + return 1 + else + return 0 + fi +} + + +VERSION=$(git describe --tags `git rev-list --tags --max-count=1`) +if is_valid_semver "$VERSION"; then + echo "$VERSION" + exit 0 +fi + + +source "${REPO_ROOT}/tools/image-tag" \ No newline at end of file diff --git a/vendor/github.com/andybalholm/brotli/encoder.go b/vendor/github.com/andybalholm/brotli/encoder.go index 650d1e42b49..1928382596e 100644 --- a/vendor/github.com/andybalholm/brotli/encoder.go +++ b/vendor/github.com/andybalholm/brotli/encoder.go @@ -21,6 +21,15 @@ func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, la e.wroteHeader = true } + if len(src) == 0 { + if lastBlock { + e.bw.writeBits(2, 3) // islast + isempty + e.bw.jumpToByteBoundary() + return e.bw.dst + } + return dst + } + var literalHisto [256]uint32 var commandHisto [704]uint32 var distanceHisto [64]uint32 diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go index 37ed8e13340..507d1cae64c 100644 --- a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go +++ b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go @@ -32,14 +32,3 @@ func (e *matchEmitter) emit(m absoluteMatch) { }) e.NextEmit = m.End } - -// trim shortens m if it extends past maxEnd. Then if the length is at least -// minLength, the match is emitted. -func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) { - if m.End > maxEnd { - m.End = maxEnd - } - if m.End-m.Start >= minLength { - e.emit(m) - } -} diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go index 5b2acba2e14..818947255df 100644 --- a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go +++ b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go @@ -56,7 +56,7 @@ func (q *M4) Reset() { } func (q *M4) score(m absoluteMatch) int { - return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost + return (m.End-m.Start)*256 + (bits.LeadingZeros32(uint32(m.Start-m.Match))-32)*q.DistanceBitCost } func (q *M4) FindMatches(dst []Match, src []byte) []Match { @@ -112,7 +112,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { // We have found some matches, and we're far enough along that we probably // won't find overlapping matches, so we might as well emit them. if matches[1] != (absoluteMatch{}) { - e.trim(matches[1], matches[0].Start, q.MinLength) + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 { + e.emit(matches[1]) + } } e.emit(matches[0]) matches = [3]absoluteMatch{} @@ -139,12 +144,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { // Look for a match. var currentMatch absoluteMatch - if i-candidate != matches[0].Start-matches[0].Match { - if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { - m := extendMatch2(src, i, candidate, e.NextEmit) - if m.End-m.Start > q.MinLength { - currentMatch = m - } + if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { + m := extendMatch2(src, i, candidate, e.NextEmit) + if m.End-m.Start > q.MinLength && q.score(m) > 0 { + currentMatch = m } } @@ -157,12 +160,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { if candidate <= 0 || i-candidate > q.MaxDistance { break } - if i-candidate != matches[0].Start-matches[0].Match { - if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { - m := extendMatch2(src, i, candidate, e.NextEmit) - if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) { - currentMatch = m - } + if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { + m := extendMatch2(src, i, candidate, e.NextEmit) + if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) { + currentMatch = m } } } @@ -217,14 +218,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { default: // Emit the first match, shortening it if necessary to avoid overlap with the second. - e.trim(matches[2], matches[1].Start, q.MinLength) + if matches[2].End > matches[1].Start { + matches[2].End = matches[1].Start + } + if matches[2].End-matches[2].Start >= q.MinLength && q.score(matches[2]) > 0 { + e.emit(matches[2]) + } matches[2] = absoluteMatch{} } } // We've found all the matches now; emit the remaining ones. if matches[1] != (absoluteMatch{}) { - e.trim(matches[1], matches[0].Start, q.MinLength) + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 { + e.emit(matches[1]) + } } if matches[0] != (absoluteMatch{}) { e.emit(matches[0]) diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index a22953805c6..4528059ca68 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,5 +1,5 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh @@ -99,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 05c7359e481..de264c85a5a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,27 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + +* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) + * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 + * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 + * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 + * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 + +* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) + * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 + * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 + +* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) + * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 + * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 + * Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 @@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 @@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 @@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when * s2: Fix binaries. * Feb 25, 2021 (v1.11.8) - * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) @@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. * Feb 19, 2016: Handle small payloads faster in level 1-3. * Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. * Feb 14, 2016: Snappy: Merge upstream changes. * Feb 14, 2016: Snappy: Fix aggressive skipping. * Feb 14, 2016: Snappy: Update benchmark. diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 66d1657d2c6..af53fb860cc 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) { } switch d.compressionLevel.chain { case 0: - // level was NoCompression or ConstantCompresssion. + // level was NoCompression or ConstantCompression. d.windowEnd = 0 default: s := d.state diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 2f410d64f5a..0d7b437f1c6 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -298,6 +298,14 @@ const ( huffmanGenericReader ) +// flushMode tells decompressor when to return data +type flushMode uint8 + +const ( + syncFlush flushMode = iota // return data after sync flush block + partialFlush // return data after each block +) + // Decompress state. type decompressor struct { // Input source. @@ -332,6 +340,8 @@ type decompressor struct { nb uint final bool + + flushMode flushMode } func (f *decompressor) nextBlock() { @@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + if f.flushMode == syncFlush { + f.toRead = f.dict.readFlush() + } + f.finishBlock() return } @@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() { if f.dict.availRead() > 0 { f.toRead = f.dict.readFlush() } + f.err = io.EOF + } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() } + f.step = nextBlock } @@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { return nil } -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { +type ReaderOpt func(*decompressor) + +// WithPartialBlock tells decompressor to return after each block, +// so it can read data written with partial flush +func WithPartialBlock() ReaderOpt { + return func(f *decompressor) { + f.flushMode = partialFlush + } +} + +// WithDict initializes the reader with a preset dictionary +func WithDict(dict []byte) ReaderOpt { + return func(f *decompressor) { + f.dict.init(maxMatchOffset, dict) + } +} + +// NewReaderOpts returns new reader with provided options +func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor @@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser { f.codebits = new([numCodes]int) f.step = nextBlock f.dict.init(maxMatchOffset, nil) + + for _, opt := range opts { + opt(&f) + } + return &f } +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + return NewReaderOpts(r) +} + // NewReaderDict is like NewReader but initializes the reader // with a preset dictionary. The returned Reader behaves as if // the uncompressed data stream started with the given dictionary, @@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser { // // The ReadCloser returned by NewReader also implements Resetter. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = nextBlock - f.dict.init(maxMatchOffset, dict) - return &f + return NewReaderOpts(r, WithDict(dict)) } diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index cc05d0f7ea9..0c7dd4ffef9 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -15,7 +15,7 @@ const ( // It is possible, but by no way guaranteed that corrupt data will // return an error. // It is up to the caller to verify integrity of the returned data. -// Use a predefined Scrach to set maximum acceptable output size. +// Use a predefined Scratch to set maximum acceptable output size. func Decompress(b []byte, s *Scratch) ([]byte, error) { s, err := s.prepare(b) if err != nil { diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress.go b/vendor/github.com/klauspost/compress/gzhttp/compress.go index 289ae3e2ee8..52e3077ec4e 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/compress.go +++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go @@ -131,15 +131,15 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. if cl >= w.minSize || len(w.buf) >= w.minSize { - // If a Content-Type wasn't specified, infer it from the current buffer. - if ct == "" { + // If a Content-Type wasn't specified, infer it from the current buffer when the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) - } - // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) - // Set the header only if the key does not exist - if _, ok := hdr[contentType]; w.setContentType && !ok { - hdr.Set(contentType, ct) + // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) + // Set the header only if the key does not exist + if _, ok := hdr[contentType]; w.setContentType && !ok { + hdr.Set(contentType, ct) + } } // If the Content-Type is acceptable to GZIP, initialize the GZIP writer. @@ -306,7 +306,7 @@ func (w *GzipResponseWriter) startPlain() error { func (w *GzipResponseWriter) WriteHeader(code int) { // Handle informational headers // This is gated to not forward 1xx responses on builds prior to go1.20. - if shouldWrite1xxResponses() && code >= 100 && code <= 199 { + if code >= 100 && code <= 199 { w.ResponseWriter.WriteHeader(code) return } @@ -324,6 +324,20 @@ func (w *GzipResponseWriter) init() { w.gw = w.gwFactory.New(w.ResponseWriter, w.level) } +// bodyAllowedForStatus reports whether a given response status code +// permits a body. See RFC 7230, section 3.3. +func bodyAllowedForStatus(status int) bool { + switch { + case status >= 100 && status <= 199: + return false + case status == 204: + return false + case status == 304: + return false + } + return true +} + // Close will close the gzip.Writer and will put it back in the gzipWriterPool. func (w *GzipResponseWriter) Close() error { if w.ignore { @@ -335,7 +349,9 @@ func (w *GzipResponseWriter) Close() error { ce = w.Header().Get(contentEncoding) cr = w.Header().Get(contentRange) ) - if ct == "" { + + // Detects the response content-type when it does not exist and the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) @@ -379,7 +395,8 @@ func (w *GzipResponseWriter) Flush() { cr = w.Header().Get(contentRange) ) - if ct == "" { + // Detects the response content-type when it does not exist and the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) @@ -464,6 +481,11 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) { return func(h http.Handler) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Add(vary, acceptEncoding) + if c.allowCompressedRequests && contentGzip(r) { + r.Header.Del(contentEncoding) + r.Body = &gzipReader{body: r.Body} + } + if acceptsGzip(r) { gw := grwPool.Get().(*GzipResponseWriter) *gw = GzipResponseWriter{ @@ -536,17 +558,18 @@ func (pct parsedContentType) equals(mediaType string, params map[string]string) // Used for functional configuration. type config struct { - minSize int - level int - writer writer.GzipWriterFactory - contentTypes func(ct string) bool - keepAcceptRanges bool - setContentType bool - suffixETag string - dropETag bool - jitterBuffer int - randomJitter string - sha256Jitter bool + minSize int + level int + writer writer.GzipWriterFactory + contentTypes func(ct string) bool + keepAcceptRanges bool + setContentType bool + suffixETag string + dropETag bool + jitterBuffer int + randomJitter string + sha256Jitter bool + allowCompressedRequests bool } func (c *config) validate() error { @@ -579,6 +602,15 @@ func MinSize(size int) option { } } +// AllowCompressedRequests will enable or disable RFC 7694 compressed requests. +// By default this is Disabled. +// See https://datatracker.ietf.org/doc/html/rfc7694 +func AllowCompressedRequests(b bool) option { + return func(c *config) { + c.allowCompressedRequests = b + } +} + // CompressionLevel sets the compression level func CompressionLevel(level int) option { return func(c *config) { @@ -752,6 +784,12 @@ func RandomJitter(n, buffer int, paranoid bool) option { } } +// contentGzip returns true if the given HTTP request indicates that it gzipped. +func contentGzip(r *http.Request) bool { + // See more detail in `acceptsGzip` + return r.Method != http.MethodHead && r.Body != nil && parseEncodingGzip(r.Header.Get(contentEncoding)) > 0 +} + // acceptsGzip returns true if the given HTTP request indicates that it will // accept a gzipped response. func acceptsGzip(r *http.Request) bool { diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go deleted file mode 100644 index 97fc25acbc9..00000000000 --- a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build !go1.20 -// +build !go1.20 - -package gzhttp - -// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes. -func shouldWrite1xxResponses() bool { - return false -} diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go deleted file mode 100644 index 2b65f67c795..00000000000 --- a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build go1.20 -// +build go1.20 - -package gzhttp - -// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes. -func shouldWrite1xxResponses() bool { - return true -} diff --git a/vendor/github.com/klauspost/compress/gzhttp/transport.go b/vendor/github.com/klauspost/compress/gzhttp/transport.go index 623aea2ed8a..3914a06e013 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/transport.go +++ b/vendor/github.com/klauspost/compress/gzhttp/transport.go @@ -61,10 +61,21 @@ func TransportCustomEval(fn func(header http.Header) bool) transportOption { } } +// TransportAlwaysDecompress will always decompress the response, +// regardless of whether we requested it or not. +// Default is false, which will pass compressed data through +// if we did not request compression. +func TransportAlwaysDecompress(enabled bool) transportOption { + return func(c *gzRoundtripper) { + c.alwaysDecomp = enabled + } +} + type gzRoundtripper struct { parent http.RoundTripper acceptEncoding string withZstd, withGzip bool + alwaysDecomp bool customEval func(header http.Header) bool } @@ -90,15 +101,19 @@ func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) { } resp, err := g.parent.RoundTrip(req) - if err != nil || !requestedComp { + if err != nil { return resp, err } - decompress := false + decompress := g.alwaysDecomp if g.customEval != nil { if !g.customEval(resp.Header) { return resp, nil } decompress = true + } else { + if !requestedComp && !g.alwaysDecomp { + return resp, nil + } } // Decompress if (decompress || g.withGzip) && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") { diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 54bd08b25c0..0f56b02d747 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 0 { - fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + fmt.Fprintf(w, "%d errors in base, stopping\n", errs) continue } // Ensure that all combinations are covered. @@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 20 { - fmt.Fprintf(w, "%d errros, stopping\n", errs) + fmt.Fprintf(w, "%d errors, stopping\n", errs) break } } diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088adfee..20b802270a7 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206a4ef..7aadd255fe3 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c34d37..dd1c973ca51 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e41501ba..f43aa815435 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b334017..df9be687be7 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 0a46f2b984f..fd15078f7df 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -83,11 +83,14 @@ type Writer struct { snappy bool flushOnWrite bool appendIndex bool + bufferCB func([]byte) level uint8 } type result struct { b []byte + // return when writing + ret []byte // Uncompressed start offset startOffset int64 } @@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) { for write := range toWrite { // Wait for the data to be available. input := <-write + if input.ret != nil && w.bufferCB != nil { + w.bufferCB(input.ret) + input.ret = nil + } in := input.b if len(in) > 0 { if w.err(nil) == nil { @@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { // but the input buffer cannot be written to by the caller // until Flush or Close has been called when concurrency != 1. // -// If you cannot control that, use the regular Write function. +// Use the WriterBufferDone to receive a callback when the buffer is done +// Processing. // // Note that input is not buffered. // This means that each write will result in discrete blocks being created. @@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { } if w.concurrency == 1 { _, err := w.writeSync(buf) + if w.bufferCB != nil { + w.bufferCB(buf) + } return err } @@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } - + orgBuf := buf for len(buf) > 0 { // Cut input. uncompressed := buf @@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { startOffset: w.uncompWritten, } w.uncompWritten += int64(len(uncompressed)) + if len(buf) == 0 && w.bufferCB != nil { + res.ret = orgBuf + } go func() { race.ReadSlice(uncompressed) @@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption { } // WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a +// EncodeBest compresses better than Encode but typically with a // big speed decrease on compression. func WriterBestCompression() WriterOption { return func(w *Writer) error { @@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption { } } +// WriterBufferDone will perform a callback when EncodeBuffer has finished +// writing a buffer to the output and the buffer can safely be reused. +// If the buffer was split into several blocks, it will be sent after the last block. +// Callbacks will not be done concurrently. +func WriterBufferDone(fn func(b []byte)) WriterOption { + return func(w *Writer) error { + w.bufferCB = fn + return nil + } +} + // WriterBlockSize allows to override the default block size. // Blocks will be this size or smaller. // Minimum size is 4KB and maximum size is 4MB. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 03744fbc765..9c28840c3bd 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: - println("Reading table for", tableIndex(i)) + if debugDecoder { + println("Reading table for", tableIndex(i)) + } if seq.fse == nil || seq.fse.preDefined { seq.fse = fseDecoderPool.Get().(*fseDecoder) } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index a4f5bf91fc6..84a79fde767 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -179,9 +179,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -210,12 +210,12 @@ encodeLoop: // Index match start+1 (long) -> s - 1 index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -241,9 +241,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -270,11 +270,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -708,9 +708,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -738,12 +738,12 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -772,9 +772,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -801,11 +801,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index a154c18f741..d36be7bd8c2 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -138,9 +138,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -166,11 +166,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -798,9 +798,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -826,11 +826,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 72af7ef0fe0..8f8223cd3a6 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return nil } if final && len(s.filling) > 0 { - s.current = e.EncodeAll(s.filling, s.current[:0]) + s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) var n2 int n2, s.err = s.w.Write(s.current) if s.err != nil { @@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -401,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -422,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -459,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } @@ -469,6 +489,15 @@ func (e *Encoder) Close() error { // Data compressed with EncodeAll can be decoded with the Decoder, // using either a stream or DecodeAll. func (e *Encoder) EncodeAll(src, dst []byte) []byte { + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + e.encoders <- enc + }() + return e.encodeAll(enc, src, dst) +} + +func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { if len(src) == 0 { if e.o.fullZero { // Add frame header. @@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - // Release encoder reference to last block. - // If a non-single block is needed the encoder will reset again. - e.encoders <- enc - }() + // Use single segments when above minimum window and below window size. single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 53e160f7e5a..e47af66e7c9 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error { } return err } - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + if debugDecoder { + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + } windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 8adabd82877..c59f17e07ad 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) default: - return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) } s.seqSize += ctx.litRemain @@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { return io.ErrUnexpectedEOF } - return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) } if ctx.litRemain < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 5b06174b898..f5591fa1e86 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc73671..066bef2a4f0 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_table.go b/vendor/github.com/mattn/go-runewidth/runewidth_table.go index e5d890c266f..ad025ad5296 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_table.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_table.go @@ -4,20 +4,21 @@ package runewidth var combining = table{ {0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3}, - {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01}, - {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, - {0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, + {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0CF3, 0x0CF3}, + {0x0D00, 0x0D01}, {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, + {0x1AB0, 0x1ACE}, {0x1B6B, 0x1B73}, {0x1DC0, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF}, {0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A}, - {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11300, 0x11301}, - {0x1133B, 0x1133C}, {0x11366, 0x1136C}, {0x11370, 0x11374}, - {0x16AF0, 0x16AF4}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, + {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85}, + {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11366, 0x1136C}, + {0x11370, 0x11374}, {0x16AF0, 0x16AF4}, {0x1CF00, 0x1CF2D}, + {0x1CF30, 0x1CF46}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, - {0x1E8D0, 0x1E8D6}, + {0x1E08F, 0x1E08F}, {0x1E8D0, 0x1E8D6}, } var doublewidth = table{ @@ -33,33 +34,34 @@ var doublewidth = table{ {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99}, - {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, - {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF}, - {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3}, - {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF}, - {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, - {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, - {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, - {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, - {0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, - {0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, - {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, - {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, - {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, - {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320}, - {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, - {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, - {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, - {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, - {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, - {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, - {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7}, - {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, - {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F978}, - {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA74}, - {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, - {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, - {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, + {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x303E}, + {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3105, 0x312F}, + {0x3131, 0x318E}, {0x3190, 0x31E3}, {0x31EF, 0x321E}, + {0x3220, 0x3247}, {0x3250, 0x4DBF}, {0x4E00, 0xA48C}, + {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3}, + {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, + {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, + {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, + {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, + {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, + {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, + {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, + {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, + {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, + {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, + {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, + {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, + {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, + {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, + {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, + {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF}, {0x1F6EB, 0x1F6EC}, + {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0}, + {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F9FF}, + {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, + {0x1FABF, 0x1FAC5}, {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, + {0x1FAF0, 0x1FAF8}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, } var ambiguous = table{ @@ -154,43 +156,43 @@ var neutral = table{ {0x0402, 0x040F}, {0x0450, 0x0450}, {0x0452, 0x052F}, {0x0531, 0x0556}, {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7}, {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, - {0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A}, - {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D}, - {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E}, - {0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7}, - {0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990}, - {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, - {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, - {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, - {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, - {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, - {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, - {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, - {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, - {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, - {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, - {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, - {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, - {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, - {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, - {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, - {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, - {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, - {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, - {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, - {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, - {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, - {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, - {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, - {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, - {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, - {0x0C3D, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, - {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C60, 0x0C63}, + {0x0600, 0x070D}, {0x070F, 0x074A}, {0x074D, 0x07B1}, + {0x07C0, 0x07FA}, {0x07FD, 0x082D}, {0x0830, 0x083E}, + {0x0840, 0x085B}, {0x085E, 0x085E}, {0x0860, 0x086A}, + {0x0870, 0x088E}, {0x0890, 0x0891}, {0x0898, 0x0983}, + {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8}, + {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, + {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CE}, + {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3}, + {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, {0x0A05, 0x0A0A}, + {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, + {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, + {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, + {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, {0x0A59, 0x0A5C}, + {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, {0x0A81, 0x0A83}, + {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, + {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, + {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, + {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, {0x0AE6, 0x0AF1}, + {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C}, + {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, + {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, {0x0B3C, 0x0B44}, + {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B55, 0x0B57}, + {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, {0x0B66, 0x0B77}, + {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, + {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, + {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, + {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, + {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, {0x0BD7, 0x0BD7}, + {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, {0x0C0E, 0x0C10}, + {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3C, 0x0C44}, + {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, + {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D}, {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C}, {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, - {0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3}, - {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D0C}, + {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE}, {0x0CE0, 0x0CE3}, + {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF3}, {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1}, @@ -200,7 +202,7 @@ var neutral = table{ {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EC0, 0x0EC4}, - {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, + {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECE}, {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7}, @@ -212,20 +214,19 @@ var neutral = table{ {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8}, - {0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736}, - {0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770}, - {0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9}, - {0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819}, - {0x1820, 0x1878}, {0x1880, 0x18AA}, {0x18B0, 0x18F5}, - {0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B}, - {0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974}, - {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, - {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, - {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, - {0x1AB0, 0x1AC0}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C}, - {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, - {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, - {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1DFB, 0x1F15}, + {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753}, + {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773}, + {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9}, + {0x1800, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA}, + {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B}, + {0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D}, + {0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9}, + {0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, + {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, + {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE}, {0x1B00, 0x1B4C}, + {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, + {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, + {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, @@ -237,7 +238,7 @@ var neutral = table{ {0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064}, {0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080}, {0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8}, - {0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20D0, 0x20F0}, + {0x20AA, 0x20AB}, {0x20AD, 0x20C0}, {0x20D0, 0x20F0}, {0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108}, {0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120}, {0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152}, @@ -275,15 +276,15 @@ var neutral = table{ {0x2780, 0x2794}, {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B5A, 0x2B73}, - {0x2B76, 0x2B95}, {0x2B97, 0x2C2E}, {0x2C30, 0x2C5E}, - {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, - {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, - {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, - {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, - {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, - {0x2DE0, 0x2E52}, {0x303F, 0x303F}, {0x4DC0, 0x4DFF}, - {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA700, 0xA7BF}, - {0xA7C2, 0xA7CA}, {0xA7F5, 0xA82C}, {0xA830, 0xA839}, + {0x2B76, 0x2B95}, {0x2B97, 0x2CF3}, {0x2CF9, 0x2D25}, + {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, + {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, + {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, + {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, + {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D}, {0x303F, 0x303F}, + {0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, + {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3}, + {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839}, {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36}, @@ -294,8 +295,8 @@ var neutral = table{ {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, - {0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F}, - {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD}, + {0xFB43, 0xFB44}, {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, + {0xFD92, 0xFDC7}, {0xFDCF, 0xFDCF}, {0xFDF0, 0xFDFF}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, @@ -307,44 +308,48 @@ var neutral = table{ {0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, - {0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755}, - {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, - {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C}, - {0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF}, - {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, - {0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7}, - {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, - {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, - {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, - {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, - {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, - {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, - {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, - {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, - {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, - {0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10FB0, 0x10FCB}, - {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F}, - {0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, - {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147}, - {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4}, - {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286}, - {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, - {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, - {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, - {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, - {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348}, - {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357}, - {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, - {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7}, - {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD}, - {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C}, - {0x11680, 0x116B8}, {0x116C0, 0x116C9}, {0x11700, 0x1171A}, - {0x1171D, 0x1172B}, {0x11730, 0x1173F}, {0x11800, 0x1183B}, - {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909}, - {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935}, - {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959}, - {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, - {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AC0, 0x11AF8}, + {0x1056F, 0x1057A}, {0x1057C, 0x1058A}, {0x1058C, 0x10592}, + {0x10594, 0x10595}, {0x10597, 0x105A1}, {0x105A3, 0x105B1}, + {0x105B3, 0x105B9}, {0x105BB, 0x105BC}, {0x10600, 0x10736}, + {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10780, 0x10785}, + {0x10787, 0x107B0}, {0x107B2, 0x107BA}, {0x10800, 0x10805}, + {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838}, + {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E}, + {0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, + {0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F}, + {0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, + {0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, + {0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, + {0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, + {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, + {0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, + {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, + {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, + {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, + {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27}, {0x10F30, 0x10F59}, + {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB}, {0x10FE0, 0x10FF6}, + {0x11000, 0x1104D}, {0x11052, 0x11075}, {0x1107F, 0x110C2}, + {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, {0x110F0, 0x110F9}, + {0x11100, 0x11134}, {0x11136, 0x11147}, {0x11150, 0x11176}, + {0x11180, 0x111DF}, {0x111E1, 0x111F4}, {0x11200, 0x11211}, + {0x11213, 0x11241}, {0x11280, 0x11286}, {0x11288, 0x11288}, + {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A9}, + {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303}, + {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, + {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339}, + {0x1133B, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, + {0x11350, 0x11350}, {0x11357, 0x11357}, {0x1135D, 0x11363}, + {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11400, 0x1145B}, + {0x1145D, 0x11461}, {0x11480, 0x114C7}, {0x114D0, 0x114D9}, + {0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644}, + {0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B9}, + {0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B}, + {0x11730, 0x11746}, {0x11800, 0x1183B}, {0x118A0, 0x118F2}, + {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913}, + {0x11915, 0x11916}, {0x11918, 0x11935}, {0x11937, 0x11938}, + {0x1193B, 0x11946}, {0x11950, 0x11959}, {0x119A0, 0x119A7}, + {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, {0x11A00, 0x11A47}, + {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8}, {0x11B00, 0x11B09}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, @@ -352,30 +357,36 @@ var neutral = table{ {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8}, + {0x11F00, 0x11F10}, {0x11F12, 0x11F3A}, {0x11F3E, 0x11F59}, {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399}, {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543}, - {0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646}, + {0x12F90, 0x12FF2}, {0x13000, 0x13455}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, - {0x16A6E, 0x16A6F}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5}, - {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61}, - {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A}, - {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F}, - {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, - {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, {0x1D000, 0x1D0F5}, - {0x1D100, 0x1D126}, {0x1D129, 0x1D1E8}, {0x1D200, 0x1D245}, - {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, - {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, - {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, - {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, - {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, - {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, - {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, - {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, - {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, - {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, - {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, - {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E2C0, 0x1E2F9}, - {0x1E2FF, 0x1E2FF}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, + {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED}, + {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, + {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, + {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, + {0x16F8F, 0x16F9F}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, + {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, + {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3}, + {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D1EA}, + {0x1D200, 0x1D245}, {0x1D2C0, 0x1D2D3}, {0x1D2E0, 0x1D2F3}, + {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454}, + {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, + {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, + {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, + {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, + {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, + {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, + {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F}, + {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, + {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, + {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E030, 0x1E06D}, + {0x1E08F, 0x1E08F}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, + {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE}, + {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E4D0, 0x1E4F9}, + {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, + {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B}, {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4}, {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, @@ -400,8 +411,8 @@ var neutral = table{ {0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594}, {0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F}, {0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4}, - {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, - {0x1F780, 0x1F7D8}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, + {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F776}, + {0x1F77B, 0x1F7D9}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F90B}, {0x1F93B, 0x1F93B}, {0x1F946, 0x1F946}, {0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D}, diff --git a/vendor/github.com/parquet-go/parquet-go/allocator.go b/vendor/github.com/parquet-go/parquet-go/allocator.go index 0cf2df03197..693ee5a2471 100644 --- a/vendor/github.com/parquet-go/parquet-go/allocator.go +++ b/vendor/github.com/parquet-go/parquet-go/allocator.go @@ -1,6 +1,10 @@ package parquet -import "github.com/parquet-go/parquet-go/internal/unsafecast" +import ( + "unsafe" + + "github.com/parquet-go/parquet-go/internal/unsafecast" +) type allocator struct{ buffer []byte } @@ -31,7 +35,7 @@ func (a *allocator) copyBytes(v []byte) []byte { func (a *allocator) copyString(v string) string { b := a.makeBytes(len(v)) copy(b, v) - return unsafecast.BytesToString(b) + return unsafecast.String(b) } func (a *allocator) reset() { @@ -54,7 +58,7 @@ func (a *rowAllocator) capture(row Row) { for i, v := range row { switch v.Kind() { case ByteArray, FixedLenByteArray: - row[i].ptr = unsafecast.AddressOfBytes(a.copyBytes(v.byteArray())) + row[i].ptr = unsafe.SliceData(a.copyBytes(v.byteArray())) } } } diff --git a/vendor/github.com/parquet-go/parquet-go/array.go b/vendor/github.com/parquet-go/parquet-go/array.go index 48df9715495..774e6f85ff0 100644 --- a/vendor/github.com/parquet-go/parquet-go/array.go +++ b/vendor/github.com/parquet-go/parquet-go/array.go @@ -3,23 +3,22 @@ package parquet import ( "unsafe" - "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" ) func makeArrayValue(values []Value, offset uintptr) sparse.Array { - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(unsafe.Add(ptr, offset), len(values), unsafe.Sizeof(Value{})) } func makeArrayString(values []string) sparse.Array { str := "" - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof(str)) } func makeArrayBE128(values []*[16]byte) sparse.Array { - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof((*[16]byte)(nil))) } @@ -29,7 +28,7 @@ func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.Array { func makeArrayOf[T any](s []T) sparse.Array { var model T - return makeArray(unsafecast.PointerOf(s), len(s), unsafe.Sizeof(model)) + return makeArray(sliceData(s), len(s), unsafe.Sizeof(model)) } func makeSlice[T any](a sparse.Array) []T { @@ -40,6 +39,10 @@ func slice[T any](p unsafe.Pointer, n int) []T { return unsafe.Slice((*T)(p), n) } +func sliceData[T any](s []T) unsafe.Pointer { + return unsafe.Pointer(unsafe.SliceData(s)) +} + type sliceHeader struct { base unsafe.Pointer len int diff --git a/vendor/github.com/parquet-go/parquet-go/bloom.go b/vendor/github.com/parquet-go/parquet-go/bloom.go index 734ac00a9d8..69d54c71712 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom.go @@ -1,6 +1,7 @@ package parquet import ( + "encoding/binary" "io" "github.com/parquet-go/parquet-go/bloom" @@ -9,6 +10,7 @@ import ( "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/unsafecast" + "golang.org/x/sys/cpu" ) // BloomFilter is an interface allowing applications to test whether a key @@ -162,27 +164,38 @@ func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) } func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Int32ToUint32(src)) + splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src)) return dst, nil } func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Int64ToUint64(src)) + splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src)) return dst, nil } func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), deprecated.Int96ToBytes(src), 12) + if cpu.IsBigEndian { + srcLen := len(src) + buf := make([]byte, srcLen*12) + for idx := range srcLen { + binary.LittleEndian.PutUint32(buf[(idx*12):4+(idx*12)], uint32(src[idx][0])) + binary.LittleEndian.PutUint32(buf[4+(idx*12):8+(idx*12)], uint32(src[idx][1])) + binary.LittleEndian.PutUint32(buf[8+(idx*12):12+(idx*12)], uint32(src[idx][2])) + } + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), buf, 12) + } else { + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[byte](src), 12) + } return dst, nil } func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Float32ToUint32(src)) + splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src)) return dst, nil } func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Float64ToUint64(src)) + splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src)) return dst, nil } @@ -210,7 +223,7 @@ func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) { filter := bloom.MakeSplitBlockFilter(dst) if size == 16 { - splitBlockEncodeUint128(filter, unsafecast.BytesToUint128(src)) + splitBlockEncodeUint128(filter, unsafecast.Slice[[16]byte](src)) } else { splitBlockEncodeFixedLenByteArray(filter, src, size) } diff --git a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go index 655d815a460..11cc255a1c1 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go @@ -3,7 +3,8 @@ package bloom import ( "io" "sync" - "unsafe" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // Filter is an interface representing read-only bloom filters where programs @@ -21,9 +22,7 @@ type SplitBlockFilter []Block // MakeSplitBlockFilter constructs a SplitBlockFilter value from the data byte // slice. func MakeSplitBlockFilter(data []byte) SplitBlockFilter { - p := *(*unsafe.Pointer)(unsafe.Pointer(&data)) - n := len(data) / BlockSize - return unsafe.Slice((*Block)(p), n) + return unsafecast.Slice[Block](data) } // NumSplitBlocksOf returns the number of blocks in a filter intended to hold @@ -64,7 +63,7 @@ func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f, x) } // The returned slice shares the memory of f. The method is intended to be used // to serialize the bloom filter to a storage medium. func (f SplitBlockFilter) Bytes() []byte { - return unsafe.Slice(*(**byte)(unsafe.Pointer(&f)), len(f)*BlockSize) + return unsafecast.Slice[byte](f) } // CheckSplitBlock is similar to bloom.SplitBlockFilter.Check but reads the diff --git a/vendor/github.com/parquet-go/parquet-go/column.go b/vendor/github.com/parquet-go/parquet-go/column.go index 0320a6df6c4..51f2d20a097 100644 --- a/vendor/github.com/parquet-go/parquet-go/column.go +++ b/vendor/github.com/parquet-go/parquet-go/column.go @@ -352,6 +352,8 @@ func (cl *columnLoader) open(file *File, path []string) (*Column, error) { c.typ = &groupType{} if lt := c.schema.LogicalType; lt != nil && lt.Map != nil { c.typ = &mapType{} + } else if lt != nil && lt.List != nil { + c.typ = &listType{} } c.columns = make([]*Column, numChildren) @@ -691,7 +693,7 @@ func (c *Column) decodeDataPage(header DataPageHeader, numValues int, repetition if pageType.Kind() == ByteArray && !isDictionaryEncoding(pageEncoding) { obuf = buffers.get(4 * (numValues + 1)) defer obuf.unref() - pageOffsets = unsafecast.BytesToUint32(obuf.data) + pageOffsets = unsafecast.Slice[uint32](obuf.data) } values := pageType.NewValues(pageValues, pageOffsets) diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer.go b/vendor/github.com/parquet-go/parquet-go/column_buffer.go index bc40cc3e17f..8435cb2977c 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer.go @@ -16,8 +16,12 @@ import ( "github.com/parquet-go/parquet-go/internal/bitpack" "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" + "golang.org/x/sys/cpu" ) +const offsetOfU64 = unsafe.Offsetof(Value{}.u64) +const offsetOfPtr = unsafe.Offsetof(Value{}.ptr) + // ColumnBuffer is an interface representing columns of a row group. // // ColumnBuffer implements sort.Interface as a way to support reordering the @@ -103,6 +107,29 @@ func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitio }, nil } +// On a big endian system, a boolean/byte value, which is in little endian byte format, is byte aligned +// to the 7th byte in a u64 (8 bytes) variable.. Hence the data will be available at 7th byte when +// interpreted as a little endian byte format. So, in order to access a boolean/byte value out of u64 variable, +// we need to add an offset of "7"... +// In the same way, an int32/uint32/float value, which is in little endian byte format, is byte aligned +// to the 4th byte in a u64 (8 bytes) variable.. Hence the data will be available at 4th byte when +// interpreted as a little endian byte format. So, in order to access an int32/uint32/float value out of u64 variable, +// we need to add an offset of "4" +func getOffset(colDict interface{}) uintptr { + var offset uintptr = 0 + + if cpu.IsBigEndian { + switch colDict.(type) { + case booleanColumnBuffer, booleanDictionary: + offset = 7 + + case int32ColumnBuffer, uint32ColumnBuffer, floatColumnBuffer, int32Dictionary, floatDictionary, uint32Dictionary: + offset = 4 + } + } + return offset +} + type nullableColumnIndex struct { ColumnIndex maxDefinitionLevel byte @@ -828,8 +855,8 @@ func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) { } func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -958,7 +985,7 @@ func (col *int32ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToInt32(b)...) + col.values = append(col.values, unsafecast.Slice[int32](b)...) return len(b), nil } @@ -968,8 +995,8 @@ func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) { } func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1057,7 +1084,7 @@ func (col *int64ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToInt64(b)...) + col.values = append(col.values, unsafecast.Slice[int64](b)...) return len(b), nil } @@ -1067,8 +1094,7 @@ func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) { } func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -1155,7 +1181,7 @@ func (col *int96ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 12) != 0 { return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b)) } - col.values = append(col.values, deprecated.BytesToInt96(b)...) + col.values = append(col.values, unsafecast.Slice[deprecated.Int96](b)...) return len(b), nil } @@ -1252,7 +1278,7 @@ func (col *floatColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToFloat32(b)...) + col.values = append(col.values, unsafecast.Slice[float32](b)...) return len(b), nil } @@ -1262,8 +1288,8 @@ func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) { } func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1350,7 +1376,7 @@ func (col *doubleColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToFloat64(b)...) + col.values = append(col.values, unsafecast.Slice[float64](b)...) return len(b), nil } @@ -1360,8 +1386,7 @@ func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) { } func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -1505,7 +1530,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths)) err = plain.RangeByteArray(values, func(value []byte) error { - col.append(unsafecast.BytesToString(value)) + col.append(unsafecast.String(value)) return nil }) @@ -1515,8 +1540,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i } func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfPtr), columnLevels{}) return len(values), nil } @@ -1742,7 +1766,7 @@ func (col *uint32ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToUint32(b)...) + col.values = append(col.values, unsafecast.Slice[uint32](b)...) return len(b), nil } @@ -1752,8 +1776,8 @@ func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) { } func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1840,7 +1864,7 @@ func (col *uint64ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToUint64(b)...) + col.values = append(col.values, unsafecast.Slice[uint64](b)...) return len(b), nil } @@ -1850,8 +1874,7 @@ func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) { } func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -2356,8 +2379,8 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo mapKey.SetIterKey(it) mapValue.SetIterValue(it) - k := makeArray(unsafecast.PointerOfValue(mapKey), 1, keySize) - v := makeArray(unsafecast.PointerOfValue(mapValue), 1, valueSize) + k := makeArray(reflectValueData(mapKey), 1, keySize) + v := makeArray(reflectValueData(mapValue), 1, valueSize) if err := writeKeyValues(columns, k, v, elemLevels); err != nil { return err @@ -2440,7 +2463,7 @@ func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeR val = t.UnixNano() } - a := makeArray(unsafecast.PointerOfValue(reflect.ValueOf(val)), 1, elemSize) + a := makeArray(reflectValueData(reflect.ValueOf(val)), 1, elemSize) if err := writeRows(columns, a, levels); err != nil { return err } diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go index 9f41875f2db..45717269965 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go @@ -10,7 +10,7 @@ import ( ) func broadcastValueInt32(dst []int32, src int8) { - bytealg.Broadcast(unsafecast.Int32ToBytes(dst), byte(src)) + bytealg.Broadcast(unsafecast.Slice[byte](dst), byte(src)) } //go:noescape diff --git a/vendor/github.com/parquet-go/parquet-go/column_index_be.go b/vendor/github.com/parquet-go/parquet-go/column_index_be.go new file mode 100644 index 00000000000..f3ea2e7bdfb --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/column_index_be.go @@ -0,0 +1,854 @@ +// This file gets added on all the big-endian CPU architectures. + +//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64 + +package parquet + +import ( + "encoding/binary" + "github.com/parquet-go/parquet-go/deprecated" + "github.com/parquet-go/parquet-go/encoding/plain" + "github.com/parquet-go/parquet-go/format" + "github.com/parquet-go/parquet-go/internal/unsafecast" + "math" +) + +type ColumnIndex interface { + // NumPages returns the number of paged in the column index. + NumPages() int + + // Returns the number of null values in the page at the given index. + NullCount(int) int64 + + // Tells whether the page at the given index contains null values only. + NullPage(int) bool + + // PageIndex return min/max bounds for the page at the given index in the + // column. + MinValue(int) Value + MaxValue(int) Value + + // IsAscending returns true if the column index min/max values are sorted + // in ascending order (based on the ordering rules of the column's logical + // type). + IsAscending() bool + + // IsDescending returns true if the column index min/max values are sorted + // in descending order (based on the ordering rules of the column's logical + // type). + IsDescending() bool +} + +// NewColumnIndex constructs a ColumnIndex instance from the given parquet +// format column index. The kind argument configures the type of values +func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex { + return &formatColumnIndex{ + kind: kind, + index: index, + } +} + +type formatColumnIndex struct { + kind Kind + index *format.ColumnIndex +} + +func (f *formatColumnIndex) NumPages() int { + return len(f.index.MinValues) +} + +func (f *formatColumnIndex) NullCount(i int) int64 { + if len(f.index.NullCounts) > 0 { + return f.index.NullCounts[i] + } + return 0 +} + +func (f *formatColumnIndex) NullPage(i int) bool { + return len(f.index.NullPages) > 0 && f.index.NullPages[i] +} + +func (f *formatColumnIndex) MinValue(i int) Value { + if f.NullPage(i) { + return Value{} + } + return f.kind.Value(f.index.MinValues[i]) +} + +func (f *formatColumnIndex) MaxValue(i int) Value { + if f.NullPage(i) { + return Value{} + } + return f.kind.Value(f.index.MaxValues[i]) +} + +func (f *formatColumnIndex) IsAscending() bool { + return f.index.BoundaryOrder == format.Ascending +} + +func (f *formatColumnIndex) IsDescending() bool { + return f.index.BoundaryOrder == format.Descending +} + +type fileColumnIndex struct{ chunk *fileColumnChunk } + +func (i fileColumnIndex) NumPages() int { + return len(i.columnIndex().NullPages) +} + +func (i fileColumnIndex) NullCount(j int) int64 { + index := i.columnIndex() + if len(index.NullCounts) > 0 { + return index.NullCounts[j] + } + return 0 +} + +func (i fileColumnIndex) NullPage(j int) bool { + return isNullPage(j, i.columnIndex()) +} + +func (i fileColumnIndex) MinValue(j int) Value { + index := i.columnIndex() + if isNullPage(j, index) { + return Value{} + } + return i.makeValue(index.MinValues[j]) +} + +func (i fileColumnIndex) MaxValue(j int) Value { + index := i.columnIndex() + if isNullPage(j, index) { + return Value{} + } + return i.makeValue(index.MaxValues[j]) +} + +func (i fileColumnIndex) IsAscending() bool { + return i.columnIndex().BoundaryOrder == format.Ascending +} + +func (i fileColumnIndex) IsDescending() bool { + return i.columnIndex().BoundaryOrder == format.Descending +} + +func (i *fileColumnIndex) makeValue(b []byte) Value { + return i.chunk.column.typ.Kind().Value(b) +} + +func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() } +func isNullPage(j int, index *format.ColumnIndex) bool { + return len(index.NullPages) > 0 && index.NullPages[j] +} + +type emptyColumnIndex struct{} + +func (emptyColumnIndex) NumPages() int { return 0 } +func (emptyColumnIndex) NullCount(int) int64 { return 0 } +func (emptyColumnIndex) NullPage(int) bool { return false } +func (emptyColumnIndex) MinValue(int) Value { return Value{} } +func (emptyColumnIndex) MaxValue(int) Value { return Value{} } +func (emptyColumnIndex) IsAscending() bool { return false } +func (emptyColumnIndex) IsDescending() bool { return false } + +type booleanColumnIndex struct{ page *booleanPage } + +func (i booleanColumnIndex) NumPages() int { return 1 } +func (i booleanColumnIndex) NullCount(int) int64 { return 0 } +func (i booleanColumnIndex) NullPage(int) bool { return false } +func (i booleanColumnIndex) MinValue(int) Value { return makeValueBoolean(i.page.min()) } +func (i booleanColumnIndex) MaxValue(int) Value { return makeValueBoolean(i.page.max()) } +func (i booleanColumnIndex) IsAscending() bool { return false } +func (i booleanColumnIndex) IsDescending() bool { return false } + +type int32ColumnIndex struct{ page *int32Page } + +func (i int32ColumnIndex) NumPages() int { return 1 } +func (i int32ColumnIndex) NullCount(int) int64 { return 0 } +func (i int32ColumnIndex) NullPage(int) bool { return false } +func (i int32ColumnIndex) MinValue(int) Value { return makeValueInt32(i.page.min()) } +func (i int32ColumnIndex) MaxValue(int) Value { return makeValueInt32(i.page.max()) } +func (i int32ColumnIndex) IsAscending() bool { return false } +func (i int32ColumnIndex) IsDescending() bool { return false } + +type int64ColumnIndex struct{ page *int64Page } + +func (i int64ColumnIndex) NumPages() int { return 1 } +func (i int64ColumnIndex) NullCount(int) int64 { return 0 } +func (i int64ColumnIndex) NullPage(int) bool { return false } +func (i int64ColumnIndex) MinValue(int) Value { return makeValueInt64(i.page.min()) } +func (i int64ColumnIndex) MaxValue(int) Value { return makeValueInt64(i.page.max()) } +func (i int64ColumnIndex) IsAscending() bool { return false } +func (i int64ColumnIndex) IsDescending() bool { return false } + +type int96ColumnIndex struct{ page *int96Page } + +func (i int96ColumnIndex) NumPages() int { return 1 } +func (i int96ColumnIndex) NullCount(int) int64 { return 0 } +func (i int96ColumnIndex) NullPage(int) bool { return false } +func (i int96ColumnIndex) MinValue(int) Value { return makeValueInt96(i.page.min()) } +func (i int96ColumnIndex) MaxValue(int) Value { return makeValueInt96(i.page.max()) } +func (i int96ColumnIndex) IsAscending() bool { return false } +func (i int96ColumnIndex) IsDescending() bool { return false } + +type floatColumnIndex struct{ page *floatPage } + +func (i floatColumnIndex) NumPages() int { return 1 } +func (i floatColumnIndex) NullCount(int) int64 { return 0 } +func (i floatColumnIndex) NullPage(int) bool { return false } +func (i floatColumnIndex) MinValue(int) Value { return makeValueFloat(i.page.min()) } +func (i floatColumnIndex) MaxValue(int) Value { return makeValueFloat(i.page.max()) } +func (i floatColumnIndex) IsAscending() bool { return false } +func (i floatColumnIndex) IsDescending() bool { return false } + +type doubleColumnIndex struct{ page *doublePage } + +func (i doubleColumnIndex) NumPages() int { return 1 } +func (i doubleColumnIndex) NullCount(int) int64 { return 0 } +func (i doubleColumnIndex) NullPage(int) bool { return false } +func (i doubleColumnIndex) MinValue(int) Value { return makeValueDouble(i.page.min()) } +func (i doubleColumnIndex) MaxValue(int) Value { return makeValueDouble(i.page.max()) } +func (i doubleColumnIndex) IsAscending() bool { return false } +func (i doubleColumnIndex) IsDescending() bool { return false } + +type byteArrayColumnIndex struct{ page *byteArrayPage } + +func (i byteArrayColumnIndex) NumPages() int { return 1 } +func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 } +func (i byteArrayColumnIndex) NullPage(int) bool { return false } +func (i byteArrayColumnIndex) MinValue(int) Value { return makeValueBytes(ByteArray, i.page.min()) } +func (i byteArrayColumnIndex) MaxValue(int) Value { return makeValueBytes(ByteArray, i.page.max()) } +func (i byteArrayColumnIndex) IsAscending() bool { return false } +func (i byteArrayColumnIndex) IsDescending() bool { return false } + +type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage } + +func (i fixedLenByteArrayColumnIndex) NumPages() int { return 1 } +func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 } +func (i fixedLenByteArrayColumnIndex) NullPage(int) bool { return false } +func (i fixedLenByteArrayColumnIndex) MinValue(int) Value { + return makeValueBytes(FixedLenByteArray, i.page.min()) +} +func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value { + return makeValueBytes(FixedLenByteArray, i.page.max()) +} +func (i fixedLenByteArrayColumnIndex) IsAscending() bool { return false } +func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false } + +type uint32ColumnIndex struct{ page *uint32Page } + +func (i uint32ColumnIndex) NumPages() int { return 1 } +func (i uint32ColumnIndex) NullCount(int) int64 { return 0 } +func (i uint32ColumnIndex) NullPage(int) bool { return false } +func (i uint32ColumnIndex) MinValue(int) Value { return makeValueUint32(i.page.min()) } +func (i uint32ColumnIndex) MaxValue(int) Value { return makeValueUint32(i.page.max()) } +func (i uint32ColumnIndex) IsAscending() bool { return false } +func (i uint32ColumnIndex) IsDescending() bool { return false } + +type uint64ColumnIndex struct{ page *uint64Page } + +func (i uint64ColumnIndex) NumPages() int { return 1 } +func (i uint64ColumnIndex) NullCount(int) int64 { return 0 } +func (i uint64ColumnIndex) NullPage(int) bool { return false } +func (i uint64ColumnIndex) MinValue(int) Value { return makeValueUint64(i.page.min()) } +func (i uint64ColumnIndex) MaxValue(int) Value { return makeValueUint64(i.page.max()) } +func (i uint64ColumnIndex) IsAscending() bool { return false } +func (i uint64ColumnIndex) IsDescending() bool { return false } + +type be128ColumnIndex struct{ page *be128Page } + +func (i be128ColumnIndex) NumPages() int { return 1 } +func (i be128ColumnIndex) NullCount(int) int64 { return 0 } +func (i be128ColumnIndex) NullPage(int) bool { return false } +func (i be128ColumnIndex) MinValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.min()) } +func (i be128ColumnIndex) MaxValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.max()) } +func (i be128ColumnIndex) IsAscending() bool { return false } +func (i be128ColumnIndex) IsDescending() bool { return false } + +// The ColumnIndexer interface is implemented by types that support generating +// parquet column indexes. +// +// The package does not export any types that implement this interface, programs +// must call NewColumnIndexer on a Type instance to construct column indexers. +type ColumnIndexer interface { + // Resets the column indexer state. + Reset() + + // Add a page to the column indexer. + IndexPage(numValues, numNulls int64, min, max Value) + + // Generates a format.ColumnIndex value from the current state of the + // column indexer. + // + // The returned value may reference internal buffers, in which case the + // values remain valid until the next call to IndexPage or Reset on the + // column indexer. + ColumnIndex() format.ColumnIndex +} + +type baseColumnIndexer struct { + nullPages []bool + nullCounts []int64 +} + +func (i *baseColumnIndexer) reset() { + i.nullPages = i.nullPages[:0] + i.nullCounts = i.nullCounts[:0] +} + +func (i *baseColumnIndexer) observe(numValues, numNulls int64) { + i.nullPages = append(i.nullPages, numValues == numNulls) + i.nullCounts = append(i.nullCounts, numNulls) +} + +func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex { + nullPages := make([]bool, len(i.nullPages)) + copy(nullPages, i.nullPages) + nullCounts := make([]int64, len(i.nullCounts)) + copy(nullCounts, i.nullCounts) + return format.ColumnIndex{ + NullPages: nullPages, + NullCounts: nullCounts, + MinValues: minValues, + MaxValues: maxValues, + BoundaryOrder: boundaryOrderOf(minOrder, maxOrder), + } +} + +type booleanColumnIndexer struct { + baseColumnIndexer + minValues []bool + maxValues []bool +} + +func newBooleanColumnIndexer() *booleanColumnIndexer { + return new(booleanColumnIndexer) +} + +func (i *booleanColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.boolean()) + i.maxValues = append(i.maxValues, max.boolean()) +} + +func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex { + return i.columnIndex( + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1), + orderOfBool(i.minValues), + orderOfBool(i.maxValues), + ) +} + +type int32ColumnIndexer struct { + baseColumnIndexer + minValues []int32 + maxValues []int32 +} + +func newInt32ColumnIndexer() *int32ColumnIndexer { + return new(int32ColumnIndexer) +} + +func (i *int32ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.int32()) + i.maxValues = append(i.maxValues, max.int32()) +} + +func reverseInt32MinMaxValues(mLen int, mVal []int32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k])) + idx += 4 + } + return buf +} + +func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt32MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt32MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfInt32(i.minValues), + orderOfInt32(i.maxValues), + ) +} + +type int64ColumnIndexer struct { + baseColumnIndexer + minValues []int64 + maxValues []int64 +} + +func newInt64ColumnIndexer() *int64ColumnIndexer { + return new(int64ColumnIndexer) +} + +func (i *int64ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.int64()) + i.maxValues = append(i.maxValues, max.int64()) +} + +func reverseInt64MinMaxValues(mLen int, mVal []int64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], uint64(mVal[k])) + idx += 8 + } + return buf +} + +func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt64MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt64MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfInt64(i.minValues), + orderOfInt64(i.maxValues), + ) +} + +type int96ColumnIndexer struct { + baseColumnIndexer + minValues []deprecated.Int96 + maxValues []deprecated.Int96 +} + +func newInt96ColumnIndexer() *int96ColumnIndexer { + return new(int96ColumnIndexer) +} + +func (i *int96ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.Int96()) + i.maxValues = append(i.maxValues, max.Int96()) +} + +func reverseInt96MinMaxValues(mLen int, mVal []deprecated.Int96) []byte { + buf := make([]byte, mLen*12) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k][0])) + binary.LittleEndian.PutUint32(buf[(4+idx):(8+idx)], uint32(mVal[k][1])) + binary.LittleEndian.PutUint32(buf[(8+idx):(12+idx)], uint32(mVal[k][2])) + idx += 12 + } + return buf +} + +func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt96MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt96MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 12), + splitFixedLenByteArrays(byteMax, 12), + deprecated.OrderOfInt96(i.minValues), + deprecated.OrderOfInt96(i.maxValues), + ) +} + +type floatColumnIndexer struct { + baseColumnIndexer + minValues []float32 + maxValues []float32 +} + +func newFloatColumnIndexer() *floatColumnIndexer { + return new(floatColumnIndexer) +} + +func (i *floatColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.float()) + i.maxValues = append(i.maxValues, max.float()) +} + +func reverseFloatMinMaxValues(mLen int, mVal []float32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], math.Float32bits(mVal[k])) + idx += 4 + } + return buf +} + +func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseFloatMinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseFloatMinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfFloat32(i.minValues), + orderOfFloat32(i.maxValues), + ) +} + +type doubleColumnIndexer struct { + baseColumnIndexer + minValues []float64 + maxValues []float64 +} + +func newDoubleColumnIndexer() *doubleColumnIndexer { + return new(doubleColumnIndexer) +} + +func (i *doubleColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.double()) + i.maxValues = append(i.maxValues, max.double()) +} + +func reverseDoubleMinMaxValues(mLen int, mVal []float64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], math.Float64bits(mVal[k])) + idx += 8 + } + return buf +} + +func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseDoubleMinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseDoubleMinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfFloat64(i.minValues), + orderOfFloat64(i.maxValues), + ) +} + +type byteArrayColumnIndexer struct { + baseColumnIndexer + sizeLimit int + minValues []byte + maxValues []byte +} + +func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer { + return &byteArrayColumnIndexer{sizeLimit: sizeLimit} +} + +func (i *byteArrayColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = plain.AppendByteArray(i.minValues, min.byteArray()) + i.maxValues = plain.AppendByteArray(i.maxValues, max.byteArray()) +} + +func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitByteArrays(i.minValues) + maxValues := splitByteArrays(i.maxValues) + if sizeLimit := i.sizeLimit; sizeLimit > 0 { + for i, v := range minValues { + minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit) + } + for i, v := range maxValues { + maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit) + } + } + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +type fixedLenByteArrayColumnIndexer struct { + baseColumnIndexer + size int + sizeLimit int + minValues []byte + maxValues []byte +} + +func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer { + return &fixedLenByteArrayColumnIndexer{ + size: size, + sizeLimit: sizeLimit, + } +} + +func (i *fixedLenByteArrayColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.byteArray()...) + i.maxValues = append(i.maxValues, max.byteArray()...) +} + +func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitFixedLenByteArrays(i.minValues, i.size) + maxValues := splitFixedLenByteArrays(i.maxValues, i.size) + if sizeLimit := i.sizeLimit; sizeLimit > 0 { + for i, v := range minValues { + minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit) + } + for i, v := range maxValues { + maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit) + } + } + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +type uint32ColumnIndexer struct { + baseColumnIndexer + minValues []uint32 + maxValues []uint32 +} + +func newUint32ColumnIndexer() *uint32ColumnIndexer { + return new(uint32ColumnIndexer) +} + +func (i *uint32ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.uint32()) + i.maxValues = append(i.maxValues, max.uint32()) +} + +func reverseUint32MinMaxValues(mLen int, mVal []uint32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], mVal[k]) + idx += 4 + } + return buf +} + +func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseUint32MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseUint32MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfUint32(i.minValues), + orderOfUint32(i.maxValues), + ) +} + +type uint64ColumnIndexer struct { + baseColumnIndexer + minValues []uint64 + maxValues []uint64 +} + +func newUint64ColumnIndexer() *uint64ColumnIndexer { + return new(uint64ColumnIndexer) +} + +func (i *uint64ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.uint64()) + i.maxValues = append(i.maxValues, max.uint64()) +} + +func reverseUint64MinMaxValues(mLen int, mVal []uint64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], mVal[k]) + idx += 8 + } + return buf +} + +func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseUint64MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseUint64MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfUint64(i.minValues), + orderOfUint64(i.maxValues), + ) +} + +type be128ColumnIndexer struct { + baseColumnIndexer + minValues [][16]byte + maxValues [][16]byte +} + +func newBE128ColumnIndexer() *be128ColumnIndexer { + return new(be128ColumnIndexer) +} + +func (i *be128ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + if !min.IsNull() { + i.minValues = append(i.minValues, *(*[16]byte)(min.byteArray())) + } + if !max.IsNull() { + i.maxValues = append(i.maxValues, *(*[16]byte)(max.byteArray())) + } +} + +func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16) + maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16) + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte { + if len(value) > sizeLimit { + value = value[:sizeLimit] + } + return value +} + +// truncateLargeMaxByteArrayValue truncates the given byte array to the given size limit. +// If the given byte array is truncated, it is incremented by 1 in place. +func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte { + if len(value) > sizeLimit { + value = value[:sizeLimit] + incrementByteArrayInplace(value) + } + return value +} + +// incrementByteArray increments the given byte array by 1. +// Reference: https://github.com/apache/parquet-java/blob/master/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java#L124 +func incrementByteArrayInplace(value []byte) { + for i := len(value) - 1; i >= 0; i-- { + value[i]++ + if value[i] != 0 { // Did not overflow: 0xFF -> 0x00 + return + } + } + // Fully overflowed, so restore all to 0xFF + for i := range value { + value[i] = 0xFF + } +} + +func splitByteArrays(data []byte) [][]byte { + length := 0 + plain.RangeByteArray(data, func([]byte) error { + length++ + return nil + }) + buffer := make([]byte, 0, len(data)-(4*length)) + values := make([][]byte, 0, length) + plain.RangeByteArray(data, func(value []byte) error { + offset := len(buffer) + buffer = append(buffer, value...) + values = append(values, buffer[offset:]) + return nil + }) + return values +} + +func splitFixedLenByteArrays(data []byte, size int) [][]byte { + data = copyBytes(data) + values := make([][]byte, len(data)/size) + for i := range values { + j := (i + 0) * size + k := (i + 1) * size + values[i] = data[j:k:k] + } + return values +} + +func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder { + if minOrder == maxOrder { + switch { + case minOrder > 0: + return format.Ascending + case minOrder < 0: + return format.Descending + } + } + return format.Unordered +} diff --git a/vendor/github.com/parquet-go/parquet-go/column_index.go b/vendor/github.com/parquet-go/parquet-go/column_index_le.go similarity index 90% rename from vendor/github.com/parquet-go/parquet-go/column_index.go rename to vendor/github.com/parquet-go/parquet-go/column_index_le.go index 1252248b6ce..4d8fec4511a 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_index.go +++ b/vendor/github.com/parquet-go/parquet-go/column_index_le.go @@ -1,3 +1,7 @@ +// This file gets added on all the little-endian CPU architectures. + +//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm + package parquet import ( @@ -87,46 +91,55 @@ func (f *formatColumnIndex) IsDescending() bool { type fileColumnIndex struct{ chunk *fileColumnChunk } func (i fileColumnIndex) NumPages() int { - return len(i.chunk.columnIndex.NullPages) + return len(i.columnIndex().NullPages) } func (i fileColumnIndex) NullCount(j int) int64 { - if len(i.chunk.columnIndex.NullCounts) > 0 { - return i.chunk.columnIndex.NullCounts[j] + index := i.columnIndex() + if len(index.NullCounts) > 0 { + return index.NullCounts[j] } return 0 } func (i fileColumnIndex) NullPage(j int) bool { - return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j] + return isNullPage(j, i.columnIndex()) } func (i fileColumnIndex) MinValue(j int) Value { - if i.NullPage(j) { + index := i.columnIndex() + if isNullPage(j, index) { return Value{} } - return i.makeValue(i.chunk.columnIndex.MinValues[j]) + return i.makeValue(index.MinValues[j]) } func (i fileColumnIndex) MaxValue(j int) Value { - if i.NullPage(j) { + index := i.columnIndex() + if isNullPage(j, index) { return Value{} } - return i.makeValue(i.chunk.columnIndex.MaxValues[j]) + return i.makeValue(index.MaxValues[j]) } func (i fileColumnIndex) IsAscending() bool { - return i.chunk.columnIndex.BoundaryOrder == format.Ascending + return i.columnIndex().BoundaryOrder == format.Ascending } func (i fileColumnIndex) IsDescending() bool { - return i.chunk.columnIndex.BoundaryOrder == format.Descending + return i.columnIndex().BoundaryOrder == format.Descending } func (i *fileColumnIndex) makeValue(b []byte) Value { return i.chunk.column.typ.Kind().Value(b) } +func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() } + +func isNullPage(j int, index *format.ColumnIndex) bool { + return len(index.NullPages) > 0 && index.NullPages[j] +} + type emptyColumnIndex struct{} func (emptyColumnIndex) NumPages() int { return 0 } @@ -325,8 +338,8 @@ func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Val func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1), - splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1), orderOfBool(i.minValues), orderOfBool(i.maxValues), ) @@ -356,8 +369,8 @@ func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfInt32(i.minValues), orderOfInt32(i.maxValues), ) @@ -387,8 +400,8 @@ func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfInt64(i.minValues), orderOfInt64(i.maxValues), ) @@ -418,8 +431,8 @@ func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12), - splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 12), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 12), deprecated.OrderOfInt96(i.minValues), deprecated.OrderOfInt96(i.maxValues), ) @@ -449,8 +462,8 @@ func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfFloat32(i.minValues), orderOfFloat32(i.maxValues), ) @@ -480,8 +493,8 @@ func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfFloat64(i.minValues), orderOfFloat64(i.maxValues), ) @@ -599,8 +612,8 @@ func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfUint32(i.minValues), orderOfUint32(i.maxValues), ) @@ -630,8 +643,8 @@ func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfUint64(i.minValues), orderOfUint64(i.maxValues), ) @@ -664,8 +677,8 @@ func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value } func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex { - minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16) - maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16) + minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16) + maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16) return i.columnIndex( minValues, maxValues, diff --git a/vendor/github.com/parquet-go/parquet-go/convert.go b/vendor/github.com/parquet-go/parquet-go/convert.go index 8b315739c02..96a49a81f63 100644 --- a/vendor/github.com/parquet-go/parquet-go/convert.go +++ b/vendor/github.com/parquet-go/parquet-go/convert.go @@ -11,9 +11,12 @@ import ( "sync" "time" + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // ConvertError is an error type returned by calls to Convert when the conversion @@ -912,7 +915,15 @@ func convertStringToInt96(v Value) (Value, error) { b := i.Bytes() c := make([]byte, 12) copy(c, b) - i96 := deprecated.BytesToInt96(c) + if cpu.IsBigEndian { + bufLen := len(c) + for idx := 0; idx < bufLen; idx = idx + 4 { + for m, n := (idx + 0), (idx + 3); m < n; m, n = m+1, n-1 { + c[m], c[n] = c[n], c[m] + } + } + } + i96 := unsafecast.Slice[deprecated.Int96](c) return v.convertToInt96(i96[0]), nil } diff --git a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go index 1bed7a5d7b5..fc6d40648e8 100644 --- a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go +++ b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go @@ -3,7 +3,6 @@ package deprecated import ( "math/big" "math/bits" - "unsafe" ) // Int96 is an implementation of the deprecated INT96 parquet type. @@ -98,21 +97,6 @@ func (i Int96) Len() int { } } -// Int96ToBytes converts the slice of Int96 values to a slice of bytes sharing -// the same backing array. -func Int96ToBytes(data []Int96) []byte { - return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 12*len(data)) -} - -// BytesToInt96 converts the byte slice passed as argument to a slice of Int96 -// sharing the same backing array. -// -// When the number of bytes in the input is not a multiple of 12, the function -// truncates it in the returned slice. -func BytesToInt96(data []byte) []Int96 { - return unsafe.Slice(*(**Int96)(unsafe.Pointer(&data)), len(data)/12) -} - func MaxLenInt96(data []Int96) int { max := 0 for i := range data { diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary.go b/vendor/github.com/parquet-go/parquet-go/dictionary.go index 3bf2d97f0da..9dce0ff6514 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary.go @@ -29,7 +29,7 @@ const ( // // This constant is used to determine a useful chunk size depending on the // size of values being inserted in dictionaries. More values of small size - // can fit in CPU caches, so the inserts can operation on larger chunks. + // can fit in CPU caches, so the inserts can operate on larger chunks. insertsTargetCacheFootprint = 8192 ) @@ -140,8 +140,8 @@ func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i) func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } func (d *booleanDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { @@ -171,7 +171,7 @@ func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { func (d *booleanDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(false) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) { @@ -238,8 +238,8 @@ func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } func (d *int32Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *int32Dictionary) init(indexes []int32) { @@ -291,7 +291,8 @@ func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -338,8 +339,7 @@ func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int64Dictionary) index(i int32) int64 { return d.values[i] } func (d *int64Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *int64Dictionary) init(indexes []int32) { @@ -378,7 +378,7 @@ func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int64Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -520,8 +520,8 @@ func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *floatDictionary) index(i int32) float32 { return d.values[i] } func (d *floatDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *floatDictionary) init(indexes []int32) { @@ -560,7 +560,8 @@ func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { func (d *floatDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { @@ -607,8 +608,7 @@ func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *doubleDictionary) index(i int32) float64 { return d.values[i] } func (d *doubleDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *doubleDictionary) init(indexes []int32) { @@ -647,7 +647,7 @@ func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) { func (d *doubleDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) { @@ -706,8 +706,7 @@ func (d *byteArrayDictionary) Len() int { return d.len() } func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(int(i))) } func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.insert(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *byteArrayDictionary) init() { @@ -745,13 +744,13 @@ func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) { func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) { if len(indexes) > 0 { base := d.index(int(indexes[0])) - minValue := unsafecast.BytesToString(base) + minValue := unsafecast.String(base) maxValue := minValue values := [64]string{} @@ -864,13 +863,13 @@ func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, v func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) { if len(indexes) > 0 { base := d.index(indexes[0]) - minValue := unsafecast.BytesToString(base) + minValue := unsafecast.String(base) maxValue := minValue values := [64]string{} @@ -931,8 +930,8 @@ func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *uint32Dictionary) init(indexes []int32) { @@ -971,7 +970,8 @@ func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1018,8 +1018,7 @@ func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] } func (d *uint64Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *uint64Dictionary) init(indexes []int32) { @@ -1058,7 +1057,7 @@ func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1176,7 +1175,7 @@ func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1242,7 +1241,7 @@ func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, data e copy(tmp, values) values = tmp } else { - clear := values[len(values) : len(values)+size] + clear := values[len(values):size] for i := range clear { clear[i] = 0 } diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go index be7154ddbde..387e65f7592 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go @@ -47,25 +47,25 @@ func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, indexes []in func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Int32ToUint32(d.values) + dict := unsafecast.Slice[uint32](d.values) dictionaryLookup32(dict, indexes, rows).check() } func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Int64ToUint64(d.values) + dict := unsafecast.Slice[uint64](d.values) dictionaryLookup64(dict, indexes, rows).check() } func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Float32ToUint32(d.values) + dict := unsafecast.Slice[uint32](d.values) dictionaryLookup32(dict, indexes, rows).check() } func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Float64ToUint64(d.values) + dict := unsafecast.Slice[uint64](d.values) dictionaryLookup64(dict, indexes, rows).check() } @@ -83,8 +83,7 @@ func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { // //dictionaryLookupByteArrayString(d.offsets, d.values, indexes, rows).check() for i, j := range indexes { - v := d.index(int(j)) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j))) } } @@ -92,8 +91,7 @@ func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse. checkLookupIndexBounds(indexes, rows) //dictionaryLookupFixedLenByteArrayString(d.data, d.size, indexes, rows).check() for i, j := range indexes { - v := d.index(j) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(j)) } } @@ -109,7 +107,7 @@ func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - //dict := unsafecast.Uint128ToBytes(d.values) + //dict := unsafecast.Slice[byte](d.values) //dictionaryLookupFixedLenByteArrayString(dict, 16, indexes, rows).check() s := "0123456789ABCDEF" for i, j := range indexes { @@ -120,7 +118,7 @@ func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - //dict := unsafecast.Uint128ToBytes(d.values) + //dict := unsafecast.Slice[byte](d.values) //dictionaryLookupFixedLenByteArrayPointer(dict, 16, indexes, rows).check() for i, j := range indexes { *(**[16]byte)(rows.Index(i)) = d.index(j) diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go index a586897fdab..4893415250f 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go @@ -5,6 +5,7 @@ package parquet import ( "unsafe" + "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" ) @@ -39,16 +40,14 @@ func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) { func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) for i, j := range indexes { - v := d.index(int(j)) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j))) } } func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) for i, j := range indexes { - v := d.index(j) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(j)) } } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go index f3a66441aa2..23b0202d7b0 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go @@ -22,13 +22,13 @@ func (e *Encoding) Encoding() format.Encoding { func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { dst = resize(dst, 4*len(src)) - encodeFloat(dst, unsafecast.Float32ToBytes(src)) + encodeFloat(dst, unsafecast.Slice[byte](src)) return dst, nil } func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { dst = resize(dst, 8*len(src)) - encodeDouble(dst, unsafecast.Float64ToBytes(src)) + encodeDouble(dst, unsafecast.Slice[byte](src)) return dst, nil } @@ -36,18 +36,18 @@ func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) } - buf := resize(unsafecast.Float32ToBytes(dst), len(src)) + buf := resize(unsafecast.Slice[byte](dst), len(src)) decodeFloat(buf, src) - return unsafecast.BytesToFloat32(buf), nil + return unsafecast.Slice[float32](buf), nil } func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) } - buf := resize(unsafecast.Float64ToBytes(dst), len(src)) + buf := resize(unsafecast.Slice[byte](dst), len(src)) decodeDouble(buf, src) - return unsafecast.BytesToFloat64(buf), nil + return unsafecast.Slice[float64](buf), nil } func resize(buf []byte, size int) []byte { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go index 556fb8cce37..6f5bf15c795 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go @@ -11,7 +11,7 @@ func encodeFloat(dst, src []byte) { b2 := dst[2*n : 3*n] b3 := dst[3*n : 4*n] - for i, v := range unsafecast.BytesToUint32(src) { + for i, v := range unsafecast.Slice[uint32](src) { b0[i] = byte(v >> 0) b1[i] = byte(v >> 8) b2[i] = byte(v >> 16) @@ -30,7 +30,7 @@ func encodeDouble(dst, src []byte) { b6 := dst[6*n : 7*n] b7 := dst[7*n : 8*n] - for i, v := range unsafecast.BytesToUint64(src) { + for i, v := range unsafecast.Slice[uint64](src) { b0[i] = byte(v >> 0) b1[i] = byte(v >> 8) b2[i] = byte(v >> 16) @@ -49,7 +49,7 @@ func decodeFloat(dst, src []byte) { b2 := src[2*n : 3*n] b3 := src[3*n : 4*n] - dst32 := unsafecast.BytesToUint32(dst) + dst32 := unsafecast.Slice[uint32](dst) for i := range dst32 { dst32[i] = uint32(b0[i]) | uint32(b1[i])<<8 | @@ -69,7 +69,7 @@ func decodeDouble(dst, src []byte) { b6 := src[6*n : 7*n] b7 := src[7*n : 8*n] - dst64 := unsafecast.BytesToUint64(dst) + dst64 := unsafecast.Slice[uint64](dst) for i := range dst64 { dst64[i] = uint64(b0[i]) | uint64(b1[i])<<8 | diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go index 36704974fad..cf9d4cfc9ec 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go @@ -34,15 +34,15 @@ func (e *BinaryPackedEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, err } func (e *BinaryPackedEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, _, err := decodeInt32(buf[:0], src) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *BinaryPackedEncoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { - buf := unsafecast.Int64ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, _, err := decodeInt64(buf[:0], src) - return unsafecast.BytesToInt64(buf), e.wrap(err) + return unsafecast.Slice[int64](buf), e.wrap(err) } func (e *BinaryPackedEncoding) wrap(err error) error { @@ -290,7 +290,7 @@ func decodeInt32(dst, src []byte) ([]byte, []byte, error) { writeOffset := len(dst) dst = resize(dst, len(dst)+4*totalValues) - out := unsafecast.BytesToInt32(dst) + out := unsafecast.Slice[int32](dst) out[writeOffset] = int32(firstValue) writeOffset++ totalValues-- @@ -354,7 +354,7 @@ func decodeInt64(dst, src []byte) ([]byte, []byte, error) { writeOffset := len(dst) dst = resize(dst, len(dst)+8*totalValues) - out := unsafecast.BytesToInt64(dst) + out := unsafecast.Slice[int64](dst) out[writeOffset] = firstValue writeOffset++ totalValues-- diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go index 5da4c0e933d..11a5a538b1f 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go @@ -230,7 +230,7 @@ func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) { case hasAVX2 && bitWidth <= 31: decodeMiniBlockInt32x27to31bitsAVX2(dst, src, bitWidth) case bitWidth == 32: - copy(dst, unsafecast.Uint32ToInt32(src)) + copy(dst, unsafecast.Slice[int32](src)) default: decodeMiniBlockInt32Default(dst, src, bitWidth) } @@ -249,7 +249,7 @@ func decodeMiniBlockInt64Default(dst []int64, src []uint32, bitWidth uint) func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) { switch { case bitWidth == 64: - copy(dst, unsafecast.Uint32ToInt64(src)) + copy(dst, unsafecast.Slice[int64](src)) default: decodeMiniBlockInt64Default(dst, src, bitWidth) } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go index 7b330eac89b..5fe27797754 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go @@ -20,8 +20,8 @@ func (buf *int32Buffer) resize(size int) { } func (buf *int32Buffer) decode(src []byte) ([]byte, error) { - values, remain, err := decodeInt32(unsafecast.Int32ToBytes(buf.values[:0]), src) - buf.values = unsafecast.BytesToInt32(values) + values, remain, err := decodeInt32(unsafecast.Slice[byte](buf.values[:0]), src) + buf.values = unsafecast.Slice[int32](values) return remain, err } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go index a107e61d028..8a58af5c39d 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go @@ -6,6 +6,7 @@ package plain import ( "encoding/binary" "fmt" + "golang.org/x/sys/cpu" "io" "math" @@ -37,23 +38,67 @@ func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) { } func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - return append(dst[:0], deprecated.Int96ToBytes(src)...), nil + return append(dst[:0], unsafecast.Slice[byte](src)...), nil } func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) { @@ -86,35 +131,84 @@ func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) } - return append(dst[:0], unsafecast.BytesToInt32(src)...), nil + + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + byteDec := make([]int32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[int32](src)...), nil + } } func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) } - return append(dst[:0], unsafecast.BytesToInt64(src)...), nil + + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + byteDec := make([]int64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) + idx += 8 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[int64](src)...), nil + } } func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) { if (len(src) % 12) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) } - return append(dst[:0], deprecated.BytesToInt96(src)...), nil + return append(dst[:0], unsafecast.Slice[deprecated.Int96](src)...), nil } func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) } - return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + byteDec := make([]float32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) + idx += 4 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[float32](src)...), nil + } } func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) } - return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + byteDec := make([]float64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) + idx += 8 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[float64](src)...), nil + } } func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go index 763172de5b3..8304afc0188 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go @@ -31,9 +31,9 @@ func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, erro if len(src) == 0 { return dst[:0], nil } - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, err := decodeInt32(buf[:0], src[1:], uint(src[0])) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *DictionaryEncoding) wrap(err error) error { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go index 4b63ed6d42b..82ddaa1b512 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go @@ -11,6 +11,8 @@ import ( "io" "unsafe" + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/bitpack" @@ -83,9 +85,9 @@ func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) { } func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, err := decodeInt32(buf[:0], src, uint(e.BitWidth)) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *Encoding) wrap(err error) error { @@ -151,7 +153,17 @@ func encodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) { } if len(src) >= 8 { - words := unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8) + words := unsafecast.Slice[uint64](src) + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + idx := 0 + for k := range srcLen { + words[k] = binary.LittleEndian.Uint64((src)[idx:(8 + idx)]) + idx += 8 + } + } else { + words = unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8) + } for i := 0; i < len(words); { j := i @@ -196,14 +208,14 @@ func encodeInt32(dst []byte, src []int32, bitWidth uint) ([]byte, error) { return dst, errEncodeInvalidBitWidth("INT32", bitWidth) } if bitWidth == 0 { - if !isZero(unsafecast.Int32ToBytes(src)) { + if !isZero(unsafecast.Slice[byte](src)) { return dst, errEncodeInvalidBitWidth("INT32", bitWidth) } return appendUvarint(dst, uint64(len(src))<<1), nil } if len(src) >= 8 { - words := unsafe.Slice((*[8]int32)(unsafe.Pointer(&src[0])), len(src)/8) + words := unsafecast.Slice[[8]int32](src) for i := 0; i < len(words); { j := i @@ -373,7 +385,7 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) { in = buf } - out := unsafecast.BytesToInt32(dst[offset:]) + out := unsafecast.Slice[int32](dst[offset:]) bitpack.UnpackInt32(out, in, bitWidth) i += length } else { @@ -385,6 +397,13 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) { bits := [4]byte{} copy(bits[:], src[i:j]) + + //swap the bytes in the "bits" array to take care of big endian arch + if cpu.IsBigEndian { + for m, n := 0, 3; m < n; m, n = m+1, n-1 { + bits[m], bits[n] = bits[n], bits[m] + } + } dst = appendRepeat(dst, bits[:], count) i = j } @@ -500,7 +519,7 @@ func grow(buf []byte, size int) []byte { } func encodeInt32BitpackDefault(dst []byte, src [][8]int32, bitWidth uint) int { - bits := unsafe.Slice((*int32)(unsafe.Pointer(&src[0])), len(src)*8) + bits := unsafecast.Slice[int32](src) bitpack.PackInt32(dst, bits, bitWidth) return bitpack.ByteCount(uint(len(src)*8) * bitWidth) } diff --git a/vendor/github.com/segmentio/encoding/LICENSE b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE similarity index 100% rename from vendor/github.com/segmentio/encoding/LICENSE rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE diff --git a/vendor/github.com/segmentio/encoding/thrift/binary.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go similarity index 98% rename from vendor/github.com/segmentio/encoding/thrift/binary.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go index 18d95d9abf9..73f15b03afb 100644 --- a/vendor/github.com/segmentio/encoding/thrift/binary.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go @@ -7,6 +7,8 @@ import ( "fmt" "io" "math" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // BinaryProtocol is a Protocol implementation for the binary thrift protocol. @@ -96,7 +98,7 @@ func (r *binaryReader) ReadBytes() ([]byte, error) { func (r *binaryReader) ReadString() (string, error) { b, err := r.ReadBytes() - return unsafeBytesToString(b), err + return unsafecast.String(b), err } func (r *binaryReader) ReadLength() (int, error) { @@ -126,7 +128,7 @@ func (r *binaryReader) ReadMessage() (Message, error) { if err != nil { return m, dontExpectEOF(err) } - m.Name = unsafeBytesToString(s) + m.Name = unsafecast.String(s) t, err := r.ReadInt8() if err != nil { diff --git a/vendor/github.com/segmentio/encoding/thrift/compact.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go similarity index 98% rename from vendor/github.com/segmentio/encoding/thrift/compact.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go index 6a286572a73..7bca5771deb 100644 --- a/vendor/github.com/segmentio/encoding/thrift/compact.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go @@ -7,6 +7,8 @@ import ( "fmt" "io" "math" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // CompactProtocol is a Protocol implementation for the compact thrift protocol. @@ -77,7 +79,7 @@ func (r *compactReader) ReadBytes() ([]byte, error) { func (r *compactReader) ReadString() (string, error) { b, err := r.ReadBytes() - return unsafeBytesToString(b), err + return unsafecast.String(b), err } func (r *compactReader) ReadLength() (int, error) { diff --git a/vendor/github.com/segmentio/encoding/thrift/debug.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/debug.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go diff --git a/vendor/github.com/segmentio/encoding/thrift/decode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/decode.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go diff --git a/vendor/github.com/segmentio/encoding/thrift/encode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/encode.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go diff --git a/vendor/github.com/segmentio/encoding/thrift/error.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/error.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go diff --git a/vendor/github.com/segmentio/encoding/thrift/protocol.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/protocol.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go diff --git a/vendor/github.com/segmentio/encoding/thrift/struct.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/struct.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go diff --git a/vendor/github.com/segmentio/encoding/thrift/thrift.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/thrift.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go diff --git a/vendor/github.com/segmentio/encoding/thrift/unsafe.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go similarity index 85% rename from vendor/github.com/segmentio/encoding/thrift/unsafe.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go index 9572b40ef0a..b27c6489d8d 100644 --- a/vendor/github.com/segmentio/encoding/thrift/unsafe.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go @@ -18,7 +18,3 @@ func makeTypeID(t reflect.Type) typeID { ptr: (*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1], } } - -func unsafeBytesToString(b []byte) string { - return *(*string)(unsafe.Pointer(&b)) -} diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/values.go b/vendor/github.com/parquet-go/parquet-go/encoding/values.go index a53a7b9f896..41ab0a23e35 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/values.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/values.go @@ -82,27 +82,27 @@ func (v *Values) Boolean() []byte { func (v *Values) Int32() []int32 { v.assertKind(Int32) - return unsafecast.BytesToInt32(v.data) + return unsafecast.Slice[int32](v.data) } func (v *Values) Int64() []int64 { v.assertKind(Int64) - return unsafecast.BytesToInt64(v.data) + return unsafecast.Slice[int64](v.data) } func (v *Values) Int96() []deprecated.Int96 { v.assertKind(Int96) - return deprecated.BytesToInt96(v.data) + return unsafecast.Slice[deprecated.Int96](v.data) } func (v *Values) Float() []float32 { v.assertKind(Float) - return unsafecast.BytesToFloat32(v.data) + return unsafecast.Slice[float32](v.data) } func (v *Values) Double() []float64 { v.assertKind(Double) - return unsafecast.BytesToFloat64(v.data) + return unsafecast.Slice[float64](v.data) } func (v *Values) ByteArray() (data []byte, offsets []uint32) { @@ -117,123 +117,86 @@ func (v *Values) FixedLenByteArray() (data []byte, size int) { func (v *Values) Uint32() []uint32 { v.assertKind(Int32) - return unsafecast.BytesToUint32(v.data) + return unsafecast.Slice[uint32](v.data) } func (v *Values) Uint64() []uint64 { v.assertKind(Int64) - return unsafecast.BytesToUint64(v.data) + return unsafecast.Slice[uint64](v.data) } func (v *Values) Uint128() [][16]byte { v.assertKind(FixedLenByteArray) v.assertSize(16) - return unsafecast.BytesToUint128(v.data) + return unsafecast.Slice[[16]byte](v.data) +} + +func makeValues[T any](kind Kind, values []T) Values { + return Values{kind: kind, data: unsafecast.Slice[byte](values)} } func BooleanValues(values []byte) Values { - return Values{ - kind: Boolean, - data: values, - } + return makeValues(Boolean, values) } func Int32Values(values []int32) Values { - return Values{ - kind: Int32, - data: unsafecast.Int32ToBytes(values), - } + return makeValues(Int32, values) } func Int64Values(values []int64) Values { - return Values{ - kind: Int64, - data: unsafecast.Int64ToBytes(values), - } + return makeValues(Int64, values) } func Int96Values(values []deprecated.Int96) Values { - return Values{ - kind: Int96, - data: deprecated.Int96ToBytes(values), - } + return makeValues(Int96, values) } func FloatValues(values []float32) Values { - return Values{ - kind: Float, - data: unsafecast.Float32ToBytes(values), - } + return makeValues(Float, values) } func DoubleValues(values []float64) Values { - return Values{ - kind: Double, - data: unsafecast.Float64ToBytes(values), - } + return makeValues(Double, values) } func ByteArrayValues(values []byte, offsets []uint32) Values { - return Values{ - kind: ByteArray, - data: values, - offsets: offsets, - } + return Values{kind: ByteArray, data: values, offsets: offsets} } func FixedLenByteArrayValues(values []byte, size int) Values { - return Values{ - kind: FixedLenByteArray, - size: int32(size), - data: values, - } + return Values{kind: FixedLenByteArray, size: int32(size), data: values} } func Uint32Values(values []uint32) Values { - return Int32Values(unsafecast.Uint32ToInt32(values)) + return Int32Values(unsafecast.Slice[int32](values)) } func Uint64Values(values []uint64) Values { - return Int64Values(unsafecast.Uint64ToInt64(values)) + return Int64Values(unsafecast.Slice[int64](values)) } func Uint128Values(values [][16]byte) Values { - return FixedLenByteArrayValues(unsafecast.Uint128ToBytes(values), 16) + return FixedLenByteArrayValues(unsafecast.Slice[byte](values), 16) } func Int32ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int32, - data: values, - } + return Values{kind: Int32, data: values} } func Int64ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int64, - data: values, - } + return Values{kind: Int64, data: values} } func Int96ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int96, - data: values, - } + return Values{kind: Int96, data: values} } func FloatValuesFromBytes(values []byte) Values { - return Values{ - kind: Float, - data: values, - } + return Values{kind: Float, data: values} } func DoubleValuesFromBytes(values []byte) Values { - return Values{ - kind: Double, - data: values, - } + return Values{kind: Double, data: values} } func EncodeBoolean(dst []byte, src Values, enc Encoding) ([]byte, error) { diff --git a/vendor/github.com/parquet-go/parquet-go/file.go b/vendor/github.com/parquet-go/parquet-go/file.go index 791eb51eece..384042b308c 100644 --- a/vendor/github.com/parquet-go/parquet-go/file.go +++ b/vendor/github.com/parquet-go/parquet-go/file.go @@ -9,9 +9,9 @@ import ( "sort" "strings" "sync" + "sync/atomic" - "github.com/segmentio/encoding/thrift" - + "github.com/parquet-go/parquet-go/encoding/thrift" "github.com/parquet-go/parquet-go/format" ) @@ -391,8 +391,8 @@ func (g *fileRowGroup) init(file *File, schema *Schema, columns []*Column, rowGr if file.hasIndexes() { j := (int(rowGroup.Ordinal) * len(columns)) + i - fileColumnChunks[i].columnIndex = &file.columnIndexes[j] - fileColumnChunks[i].offsetIndex = &file.offsetIndexes[j] + fileColumnChunks[i].columnIndex.Store(&file.columnIndexes[j]) + fileColumnChunks[i].offsetIndex.Store(&file.offsetIndexes[j]) } g.columns[i] = &fileColumnChunks[i] @@ -442,8 +442,8 @@ type fileColumnChunk struct { column *Column bloomFilter *bloomFilter rowGroup *format.RowGroup - columnIndex *format.ColumnIndex - offsetIndex *format.OffsetIndex + columnIndex atomic.Pointer[format.ColumnIndex] + offsetIndex atomic.Pointer[format.OffsetIndex] chunk *format.ColumnChunk } @@ -462,23 +462,25 @@ func (c *fileColumnChunk) Pages() Pages { } func (c *fileColumnChunk) ColumnIndex() (ColumnIndex, error) { - if err := c.readColumnIndex(); err != nil { + index, err := c.readColumnIndex() + if err != nil { return nil, err } - if c.columnIndex == nil || c.chunk.ColumnIndexOffset == 0 { + if index == nil || c.chunk.ColumnIndexOffset == 0 { return nil, ErrMissingColumnIndex } return fileColumnIndex{c}, nil } func (c *fileColumnChunk) OffsetIndex() (OffsetIndex, error) { - if err := c.readOffsetIndex(); err != nil { + index, err := c.readOffsetIndex() + if err != nil { return nil, err } - if c.offsetIndex == nil || c.chunk.OffsetIndexOffset == 0 { + if index == nil || c.chunk.OffsetIndexOffset == 0 { return nil, ErrMissingOffsetIndex } - return (*fileOffsetIndex)(c.offsetIndex), nil + return (*fileOffsetIndex)(index), nil } func (c *fileColumnChunk) BloomFilter() BloomFilter { @@ -492,48 +494,59 @@ func (c *fileColumnChunk) NumValues() int64 { return c.chunk.MetaData.NumValues } -func (c *fileColumnChunk) readColumnIndex() error { - if c.columnIndex != nil { - return nil +func (c *fileColumnChunk) readColumnIndex() (*format.ColumnIndex, error) { + if index := c.columnIndex.Load(); index != nil { + return index, nil } chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()] offset, length := chunkMeta.ColumnIndexOffset, chunkMeta.ColumnIndexLength if offset == 0 { - return nil + return nil, nil } indexData := make([]byte, int(length)) var columnIndex format.ColumnIndex if _, err := readAt(c.file.reader, indexData, offset); err != nil { - return fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err) + return nil, fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err) } if err := thrift.Unmarshal(&c.file.protocol, indexData, &columnIndex); err != nil { - return fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) + return nil, fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) } - c.columnIndex = &columnIndex - return nil + index := &columnIndex + // We do a CAS (and Load on CAS failure) instead of a simple Store for + // the nice property that concurrent calling goroutines will only ever + // observe a single pointer value for the result. + if !c.columnIndex.CompareAndSwap(nil, index) { + // another goroutine populated it since we last read the pointer + return c.columnIndex.Load(), nil + } + return index, nil } -func (c *fileColumnChunk) readOffsetIndex() error { - if c.offsetIndex != nil { - return nil +func (c *fileColumnChunk) readOffsetIndex() (*format.OffsetIndex, error) { + if index := c.offsetIndex.Load(); index != nil { + return index, nil } chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()] offset, length := chunkMeta.OffsetIndexOffset, chunkMeta.OffsetIndexLength if offset == 0 { - return nil + return nil, nil } indexData := make([]byte, int(length)) var offsetIndex format.OffsetIndex if _, err := readAt(c.file.reader, indexData, offset); err != nil { - return fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err) + return nil, fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err) } if err := thrift.Unmarshal(&c.file.protocol, indexData, &offsetIndex); err != nil { - return fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) + return nil, fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) } - c.offsetIndex = &offsetIndex - return nil + index := &offsetIndex + if !c.offsetIndex.CompareAndSwap(nil, index) { + // another goroutine populated it since we last read the pointer + return c.offsetIndex.Load(), nil + } + return index, nil } type filePages struct { @@ -745,7 +758,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) { if f.chunk == nil { return io.ErrClosedPipe } - if f.chunk.offsetIndex == nil { + if index := f.chunk.offsetIndex.Load(); index == nil { _, err = f.section.Seek(f.dataOffset-f.baseOffset, io.SeekStart) f.skip = rowIndex f.index = 0 @@ -753,7 +766,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) { f.index = 1 } } else { - pages := f.chunk.offsetIndex.PageLocations + pages := index.PageLocations index := sort.Search(len(pages), func(i int) bool { return pages[i].FirstRowIndex > rowIndex }) - 1 diff --git a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go index a95e9e96be9..0a1686f17b6 100644 --- a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go +++ b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go @@ -98,7 +98,7 @@ func (t *Int32Table) Len() int { return t.len } func (t *Int32Table) Cap() int { return t.size() } func (t *Int32Table) Probe(keys, values []int32) int { - return t.probe(unsafecast.Int32ToUint32(keys), values) + return t.probe(unsafecast.Slice[uint32](keys), values) } func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int { @@ -118,7 +118,7 @@ func (t *Float32Table) Len() int { return t.len } func (t *Float32Table) Cap() int { return t.size() } func (t *Float32Table) Probe(keys []float32, values []int32) int { - return t.probe(unsafecast.Float32ToUint32(keys), values) + return t.probe(unsafecast.Slice[uint32](keys), values) } func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int { @@ -342,7 +342,7 @@ func (t *Int64Table) Len() int { return t.len } func (t *Int64Table) Cap() int { return t.size() } func (t *Int64Table) Probe(keys []int64, values []int32) int { - return t.probe(unsafecast.Int64ToUint64(keys), values) + return t.probe(unsafecast.Slice[uint64](keys), values) } func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int { @@ -362,7 +362,7 @@ func (t *Float64Table) Len() int { return t.len } func (t *Float64Table) Cap() int { return t.size() } func (t *Float64Table) Probe(keys []float64, values []int32) int { - return t.probe(unsafecast.Float64ToUint64(keys), values) + return t.probe(unsafecast.Slice[uint64](keys), values) } func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int { @@ -639,7 +639,7 @@ func (t *table128) init(cap int, maxLoad float64) { func (t *table128) kv() (keys [][16]byte, values []int32) { i := t.cap * 16 - return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:]) + return unsafecast.Slice[[16]byte](t.table[:i]), unsafecast.Slice[int32](t.table[i:]) } func (t *table128) grow(totalValues int) { @@ -753,8 +753,8 @@ func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int { func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int { modulo := uintptr(tableCap) - 1 offset := uintptr(tableCap) * 16 - tableKeys := unsafecast.BytesToUint128(table[:offset]) - tableValues := unsafecast.BytesToInt32(table[offset:]) + tableKeys := unsafecast.Slice[[16]byte](table[:offset]) + tableValues := unsafecast.Slice[int32](table[offset:]) for i, hash := range hashes { key := keys.Index(i) diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go index fd92c18380d..f3932223919 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go @@ -29,7 +29,7 @@ func unpackInt32(dst []int32, src []byte, bitWidth uint) { case hasAVX2 && bitWidth <= 31: unpackInt32x27to31bitsAVX2(dst, src, bitWidth) case bitWidth == 32: - copy(dst, unsafecast.BytesToInt32(src)) + copy(dst, unsafecast.Slice[int32](src)) default: unpackInt32Default(dst, src, bitWidth) } diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go index b7d46ba9536..cddbd773a51 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go @@ -3,11 +3,27 @@ package bitpack import ( + "encoding/binary" + + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/internal/unsafecast" ) func unpackInt32(dst []int32, src []byte, bitWidth uint) { - bits := unsafecast.BytesToUint32(src) + var bits []uint32 + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + bits = make([]uint32, srcLen) + idx := 0 + for k := range srcLen { + bits[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) + idx += 4 + } + } else { + bits = unsafecast.Slice[uint32](src) + } + bitMask := uint32(1<= 0 { + if i := bytes.IndexByte(unsafecast.Slice[byte](data), 0); i >= 0 { return i } return len(data) } func streakOfFalse(data []bool) int { - if i := bytes.IndexByte(unsafecast.BoolToBytes(data), 1); i >= 0 { + if i := bytes.IndexByte(unsafecast.Slice[byte](data), 1); i >= 0 { return i } return len(data) diff --git a/vendor/github.com/parquet-go/parquet-go/order_purego.go b/vendor/github.com/parquet-go/parquet-go/order_purego.go index 2011455e152..44c4d7905e3 100644 --- a/vendor/github.com/parquet-go/parquet-go/order_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/order_purego.go @@ -2,174 +2,28 @@ package parquet -// ----------------------------------------------------------------------------- -// TODO: use generics versions of the these functions to reduce the amount of -// code to maintain when we drop compatilibty with Go version older than 1.18. -// ----------------------------------------------------------------------------- +import "cmp" -func orderOfInt32(data []int32) int { - if len(data) > 1 { - if int32AreInAscendingOrder(data) { - return +1 - } - if int32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfInt64(data []int64) int { - if len(data) > 1 { - if int64AreInAscendingOrder(data) { - return +1 - } - if int64AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfUint32(data []uint32) int { - if len(data) > 1 { - if uint32AreInAscendingOrder(data) { - return +1 - } - if uint32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfUint64(data []uint64) int { - if len(data) > 1 { - if uint64AreInAscendingOrder(data) { - return +1 - } - if uint64AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfFloat32(data []float32) int { - if len(data) > 1 { - if float32AreInAscendingOrder(data) { - return +1 - } - if float32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} +func orderOfInt32(data []int32) int { return orderOf(data) } +func orderOfInt64(data []int64) int { return orderOf(data) } +func orderOfUint32(data []uint32) int { return orderOf(data) } +func orderOfUint64(data []uint64) int { return orderOf(data) } +func orderOfFloat32(data []float32) int { return orderOf(data) } +func orderOfFloat64(data []float64) int { return orderOf(data) } -func orderOfFloat64(data []float64) int { +func orderOf[T cmp.Ordered](data []T) int { if len(data) > 1 { - if float64AreInAscendingOrder(data) { + if orderIsAscending(data) { return +1 } - if float64AreInDescendingOrder(data) { + if orderIsDescending(data) { return -1 } } return 0 } -func int32AreInAscendingOrder(data []int32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func int32AreInDescendingOrder(data []int32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func int64AreInAscendingOrder(data []int64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func int64AreInDescendingOrder(data []int64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func uint32AreInAscendingOrder(data []uint32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func uint32AreInDescendingOrder(data []uint32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func uint64AreInAscendingOrder(data []uint64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func uint64AreInDescendingOrder(data []uint64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func float32AreInAscendingOrder(data []float32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func float32AreInDescendingOrder(data []float32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func float64AreInAscendingOrder(data []float64) bool { +func orderIsAscending[T cmp.Ordered](data []T) bool { for i := len(data) - 1; i > 0; i-- { if data[i-1] > data[i] { return false @@ -178,7 +32,7 @@ func float64AreInAscendingOrder(data []float64) bool { return true } -func float64AreInDescendingOrder(data []float64) bool { +func orderIsDescending[T cmp.Ordered](data []T) bool { for i := len(data) - 1; i > 0; i-- { if data[i-1] < data[i] { return false diff --git a/vendor/github.com/parquet-go/parquet-go/page_values.go b/vendor/github.com/parquet-go/parquet-go/page_values.go index 964220b4ebb..ecbdffb0c8d 100644 --- a/vendor/github.com/parquet-go/parquet-go/page_values.go +++ b/vendor/github.com/parquet-go/parquet-go/page_values.go @@ -149,7 +149,7 @@ type int32PageValues struct { } func (r *int32PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt32s(unsafecast.BytesToInt32(b)) + n, err = r.ReadInt32s(unsafecast.Slice[int32](b)) return 4 * n, err } @@ -180,7 +180,7 @@ type int64PageValues struct { } func (r *int64PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt64s(unsafecast.BytesToInt64(b)) + n, err = r.ReadInt64s(unsafecast.Slice[int64](b)) return 8 * n, err } @@ -211,7 +211,7 @@ type int96PageValues struct { } func (r *int96PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt96s(deprecated.BytesToInt96(b)) + n, err = r.ReadInt96s(unsafecast.Slice[deprecated.Int96](b)) return 12 * n, err } @@ -242,7 +242,7 @@ type floatPageValues struct { } func (r *floatPageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadFloats(unsafecast.BytesToFloat32(b)) + n, err = r.ReadFloats(unsafecast.Slice[float32](b)) return 4 * n, err } @@ -273,7 +273,7 @@ type doublePageValues struct { } func (r *doublePageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b)) + n, err = r.ReadDoubles(unsafecast.Slice[float64](b)) return 8 * n, err } @@ -395,7 +395,7 @@ type uint32PageValues struct { } func (r *uint32PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadUint32s(unsafecast.BytesToUint32(b)) + n, err = r.ReadUint32s(unsafecast.Slice[uint32](b)) return 4 * n, err } @@ -426,7 +426,7 @@ type uint64PageValues struct { } func (r *uint64PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadUint64s(unsafecast.BytesToUint64(b)) + n, err = r.ReadUint64s(unsafecast.Slice[uint64](b)) return 8 * n, err } diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/array.go b/vendor/github.com/parquet-go/parquet-go/sparse/array.go index 94285becb8b..fecfb4dc4d7 100644 --- a/vendor/github.com/parquet-go/parquet-go/sparse/array.go +++ b/vendor/github.com/parquet-go/parquet-go/sparse/array.go @@ -8,7 +8,7 @@ import ( type Array struct{ array } func UnsafeArray(base unsafe.Pointer, length int, offset uintptr) Array { - return Array{makeArray(base, uintptr(length), offset)} + return Array{unsafeArray(base, length, offset)} } func (a Array) Len() int { return int(a.len) } @@ -36,8 +36,17 @@ type array struct { off uintptr } -func makeArray(base unsafe.Pointer, length, offset uintptr) array { - return array{ptr: base, len: length, off: offset} +func makeArray[T any](base []T) array { + var z T + return array{ + ptr: unsafe.Pointer(unsafe.SliceData(base)), + len: uintptr(len(base)), + off: unsafe.Sizeof(z), + } +} + +func unsafeArray(base unsafe.Pointer, length int, offset uintptr) array { + return array{ptr: base, len: uintptr(length), off: offset} } func (a array) index(i int) unsafe.Pointer { @@ -72,11 +81,11 @@ func (a array) offset(off uintptr) array { type BoolArray struct{ array } func MakeBoolArray(values []bool) BoolArray { - return BoolArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 1)} + return BoolArray{makeArray(values)} } func UnsafeBoolArray(base unsafe.Pointer, length int, offset uintptr) BoolArray { - return BoolArray{makeArray(base, uintptr(length), offset)} + return BoolArray{unsafeArray(base, length, offset)} } func (a BoolArray) Len() int { return int(a.len) } @@ -88,11 +97,11 @@ func (a BoolArray) UnsafeArray() Array { return Array{a.array} } type Int8Array struct{ array } func MakeInt8Array(values []int8) Int8Array { - return Int8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int8Array{makeArray(values)} } func UnsafeInt8Array(base unsafe.Pointer, length int, offset uintptr) Int8Array { - return Int8Array{makeArray(base, uintptr(length), offset)} + return Int8Array{unsafeArray(base, length, offset)} } func (a Int8Array) Len() int { return int(a.len) } @@ -104,11 +113,11 @@ func (a Int8Array) UnsafeArray() Array { return Array{a.array} } type Int16Array struct{ array } func MakeInt16Array(values []int16) Int16Array { - return Int16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int16Array{makeArray(values)} } func UnsafeInt16Array(base unsafe.Pointer, length int, offset uintptr) Int16Array { - return Int16Array{makeArray(base, uintptr(length), offset)} + return Int16Array{unsafeArray(base, length, offset)} } func (a Int16Array) Len() int { return int(a.len) } @@ -122,11 +131,11 @@ func (a Int16Array) UnsafeArray() Array { return Array{a.array} } type Int32Array struct{ array } func MakeInt32Array(values []int32) Int32Array { - return Int32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Int32Array{makeArray(values)} } func UnsafeInt32Array(base unsafe.Pointer, length int, offset uintptr) Int32Array { - return Int32Array{makeArray(base, uintptr(length), offset)} + return Int32Array{unsafeArray(base, length, offset)} } func (a Int32Array) Len() int { return int(a.len) } @@ -142,11 +151,11 @@ func (a Int32Array) UnsafeArray() Array { return Array{a.array} } type Int64Array struct{ array } func MakeInt64Array(values []int64) Int64Array { - return Int64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int64Array{makeArray(values)} } func UnsafeInt64Array(base unsafe.Pointer, length int, offset uintptr) Int64Array { - return Int64Array{makeArray(base, uintptr(length), offset)} + return Int64Array{unsafeArray(base, length, offset)} } func (a Int64Array) Len() int { return int(a.len) } @@ -164,11 +173,11 @@ func (a Int64Array) UnsafeArray() Array { return Array{a.array} } type Float32Array struct{ array } func MakeFloat32Array(values []float32) Float32Array { - return Float32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Float32Array{makeArray(values)} } func UnsafeFloat32Array(base unsafe.Pointer, length int, offset uintptr) Float32Array { - return Float32Array{makeArray(base, uintptr(length), offset)} + return Float32Array{unsafeArray(base, length, offset)} } func (a Float32Array) Len() int { return int(a.len) } @@ -181,11 +190,11 @@ func (a Float32Array) UnsafeArray() Array { return Array{a.array} } type Float64Array struct{ array } func MakeFloat64Array(values []float64) Float64Array { - return Float64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Float64Array{makeArray(values)} } func UnsafeFloat64Array(base unsafe.Pointer, length int, offset uintptr) Float64Array { - return Float64Array{makeArray(base, uintptr(length), offset)} + return Float64Array{unsafeArray(base, length, offset)} } func (a Float64Array) Len() int { return int(a.len) } @@ -197,11 +206,11 @@ func (a Float64Array) UnsafeArray() Array { return Array{a.array} } type Uint8Array struct{ array } func MakeUint8Array(values []uint8) Uint8Array { - return Uint8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint8Array{makeArray(values)} } func UnsafeUint8Array(base unsafe.Pointer, length int, offset uintptr) Uint8Array { - return Uint8Array{makeArray(base, uintptr(length), offset)} + return Uint8Array{unsafeArray(base, length, offset)} } func (a Uint8Array) Len() int { return int(a.len) } @@ -212,11 +221,11 @@ func (a Uint8Array) UnsafeArray() Array { return Array{a.array} } type Uint16Array struct{ array } func MakeUint16Array(values []uint16) Uint16Array { - return Uint16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint16Array{makeArray(values)} } func UnsafeUint16Array(base unsafe.Pointer, length int, offset uintptr) Uint16Array { - return Uint16Array{makeArray(base, uintptr(length), offset)} + return Uint16Array{unsafeArray(base, length, offset)} } func (a Uint16Array) Len() int { return int(a.len) } @@ -228,11 +237,11 @@ func (a Uint16Array) UnsafeArray() Array { return Array{a.array} } type Uint32Array struct{ array } func MakeUint32Array(values []uint32) Uint32Array { - return Uint32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Uint32Array{makeArray(values)} } func UnsafeUint32Array(base unsafe.Pointer, length int, offset uintptr) Uint32Array { - return Uint32Array{makeArray(base, uintptr(length), offset)} + return Uint32Array{unsafeArray(base, length, offset)} } func (a Uint32Array) Len() int { return int(a.len) } @@ -245,11 +254,11 @@ func (a Uint32Array) UnsafeArray() Array { return Array{a.array} } type Uint64Array struct{ array } func MakeUint64Array(values []uint64) Uint64Array { - return Uint64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint64Array{makeArray(values)} } func UnsafeUint64Array(base unsafe.Pointer, length int, offset uintptr) Uint64Array { - return Uint64Array{makeArray(base, uintptr(length), offset)} + return Uint64Array{unsafeArray(base, length, offset)} } func (a Uint64Array) Len() int { return int(a.len) } @@ -263,11 +272,11 @@ func (a Uint64Array) UnsafeArray() Array { return Array{a.array} } type Uint128Array struct{ array } func MakeUint128Array(values [][16]byte) Uint128Array { - return Uint128Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 16)} + return Uint128Array{makeArray(values)} } func UnsafeUint128Array(base unsafe.Pointer, length int, offset uintptr) Uint128Array { - return Uint128Array{makeArray(base, uintptr(length), offset)} + return Uint128Array{unsafeArray(base, length, offset)} } func (a Uint128Array) Len() int { return int(a.len) } @@ -283,11 +292,11 @@ type StringArray struct{ array } func MakeStringArray(values []string) StringArray { const sizeOfString = unsafe.Sizeof("") - return StringArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfString)} + return StringArray{makeArray(values)} } func UnsafeStringArray(base unsafe.Pointer, length int, offset uintptr) StringArray { - return StringArray{makeArray(base, uintptr(length), offset)} + return StringArray{unsafeArray(base, length, offset)} } func (a StringArray) Len() int { return int(a.len) } @@ -298,12 +307,11 @@ func (a StringArray) UnsafeArray() Array { return Array{a.array} } type TimeArray struct{ array } func MakeTimeArray(values []time.Time) TimeArray { - const sizeOfTime = unsafe.Sizeof(time.Time{}) - return TimeArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfTime)} + return TimeArray{makeArray(values)} } func UnsafeTimeArray(base unsafe.Pointer, length int, offset uintptr) TimeArray { - return TimeArray{makeArray(base, uintptr(length), offset)} + return TimeArray{unsafeArray(base, length, offset)} } func (a TimeArray) Len() int { return int(a.len) } diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go index 746a0930d1c..d7d72d091bf 100644 --- a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go +++ b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go @@ -1,21 +1,21 @@ package sparse -import "unsafe" +import "github.com/parquet-go/parquet-go/internal/unsafecast" func GatherInt32(dst []int32, src Int32Array) int { - return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array()) + return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array()) } func GatherInt64(dst []int64, src Int64Array) int { - return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array()) + return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array()) } func GatherFloat32(dst []float32, src Float32Array) int { - return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array()) + return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array()) } func GatherFloat64(dst []float64, src Float64Array) int { - return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array()) + return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array()) } func GatherBits(dst []byte, src Uint8Array) int { return gatherBits(dst, src) } diff --git a/vendor/github.com/parquet-go/parquet-go/type.go b/vendor/github.com/parquet-go/parquet-go/type.go index 59be12ab3a6..f5690a22e2b 100644 --- a/vendor/github.com/parquet-go/parquet-go/type.go +++ b/vendor/github.com/parquet-go/parquet-go/type.go @@ -12,7 +12,6 @@ import ( "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" - "github.com/parquet-go/parquet-go/internal/unsafecast" ) // Kind is an enumeration type representing the physical types supported by the @@ -901,7 +900,7 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error { // overhead we instead convert the reflect.Value holding the // destination array into a byte slice which allows us to use // a more efficient call to copy. - d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v)) + d := unsafe.Slice((*byte)(reflectValueData(dst)), len(v)) copy(d, v) return nil } @@ -915,6 +914,10 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error { return nil } +func reflectValueData(v reflect.Value) unsafe.Pointer { + return (*[2]unsafe.Pointer)(unsafe.Pointer(&v))[1] +} + func (t fixedLenByteArrayType) ConvertValue(val Value, typ Type) (Value, error) { switch typ.(type) { case *stringType: diff --git a/vendor/github.com/parquet-go/parquet-go/value.go b/vendor/github.com/parquet-go/parquet-go/value.go index a48f4344bc7..a85448742fd 100644 --- a/vendor/github.com/parquet-go/parquet-go/value.go +++ b/vendor/github.com/parquet-go/parquet-go/value.go @@ -14,7 +14,6 @@ import ( "github.com/google/uuid" "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/format" - "github.com/parquet-go/parquet-go/internal/unsafecast" ) const ( @@ -419,11 +418,11 @@ func makeValueDouble(value float64) Value { } func makeValueBytes(kind Kind, value []byte) Value { - return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value)) + return makeValueByteArray(kind, unsafe.SliceData(value), len(value)) } func makeValueString(kind Kind, value string) Value { - return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value)) + return makeValueByteArray(kind, unsafe.StringData(value), len(value)) } func makeValueFixedLenByteArray(v reflect.Value) Value { @@ -463,8 +462,8 @@ func (v *Value) float() float32 { return math.Float32frombits(uint32(v. func (v *Value) double() float64 { return math.Float64frombits(uint64(v.u64)) } func (v *Value) uint32() uint32 { return uint32(v.u64) } func (v *Value) uint64() uint64 { return v.u64 } -func (v *Value) byteArray() []byte { return unsafecast.Bytes(v.ptr, int(v.u64)) } -func (v *Value) string() string { return unsafecast.BytesToString(v.byteArray()) } +func (v *Value) byteArray() []byte { return unsafe.Slice(v.ptr, v.u64) } +func (v *Value) string() string { return unsafe.String(v.ptr, v.u64) } func (v *Value) be128() *[16]byte { return (*[16]byte)(unsafe.Pointer(v.ptr)) } func (v *Value) column() int { return int(^v.columnIndex) } @@ -516,14 +515,14 @@ func (v Value) convertToDouble(x float64) Value { func (v Value) convertToByteArray(x []byte) Value { v.kind = ^int8(ByteArray) - v.ptr = unsafecast.AddressOfBytes(x) + v.ptr = unsafe.SliceData(x) v.u64 = uint64(len(x)) return v } func (v Value) convertToFixedLenByteArray(x []byte) Value { v.kind = ^int8(FixedLenByteArray) - v.ptr = unsafecast.AddressOfBytes(x) + v.ptr = unsafe.SliceData(x) v.u64 = uint64(len(x)) return v } @@ -787,7 +786,7 @@ func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value { func (v Value) Clone() Value { switch k := v.Kind(); k { case ByteArray, FixedLenByteArray: - v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray())) + v.ptr = unsafe.SliceData(copyBytes(v.byteArray())) } return v } diff --git a/vendor/github.com/parquet-go/parquet-go/writer.go b/vendor/github.com/parquet-go/parquet-go/writer.go index c02f4e29598..8d9e44c7ee2 100644 --- a/vendor/github.com/parquet-go/parquet-go/writer.go +++ b/vendor/github.com/parquet-go/parquet-go/writer.go @@ -17,8 +17,8 @@ import ( "github.com/parquet-go/parquet-go/compress" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/encoding/plain" + "github.com/parquet-go/parquet-go/encoding/thrift" "github.com/parquet-go/parquet-go/format" - "github.com/segmentio/encoding/thrift" ) const ( diff --git a/vendor/go.opentelemetry.io/otel/.golangci.yml b/vendor/go.opentelemetry.io/otel/.golangci.yml index a5f904197fe..d09555506f7 100644 --- a/vendor/go.opentelemetry.io/otel/.golangci.yml +++ b/vendor/go.opentelemetry.io/otel/.golangci.yml @@ -25,6 +25,7 @@ linters: - revive - staticcheck - tenv + - testifylint - typecheck - unconvert - unused @@ -302,3 +303,9 @@ linters-settings: # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#waitgroup-by-value - name: waitgroup-by-value disabled: false + testifylint: + enable-all: true + disable: + - float-compare + - go-require + - require-error diff --git a/vendor/go.opentelemetry.io/otel/CHANGELOG.md b/vendor/go.opentelemetry.io/otel/CHANGELOG.md index fb107426e76..4b361d0269c 100644 --- a/vendor/go.opentelemetry.io/otel/CHANGELOG.md +++ b/vendor/go.opentelemetry.io/otel/CHANGELOG.md @@ -11,6 +11,35 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm +## [1.31.0/0.53.0/0.7.0/0.0.10] 2024-10-11 + +### Added + +- Add `go.opentelemetry.io/otel/sdk/metric/exemplar` package which includes `Exemplar`, `Filter`, `TraceBasedFilter`, `AlwaysOnFilter`, `HistogramReservoir`, `FixedSizeReservoir`, `Reservoir`, `Value` and `ValueType` types. These will be used for configuring the exemplar reservoir for the metrics sdk. (#5747, #5862) +- Add `WithExportBufferSize` option to log batch processor.(#5877) + +### Changed + +- Enable exemplars by default in `go.opentelemetry.io/otel/sdk/metric`. Exemplars can be disabled by setting `OTEL_METRICS_EXEMPLAR_FILTER=always_off` (#5778) +- `Logger.Enabled` in `go.opentelemetry.io/otel/log` now accepts a newly introduced `EnabledParameters` type instead of `Record`. (#5791) +- `FilterProcessor.Enabled` in `go.opentelemetry.io/otel/sdk/log/internal/x` now accepts `EnabledParameters` instead of `Record`. (#5791) +- The `Record` type in `go.opentelemetry.io/otel/log` is no longer comparable. (#5847) +- Performance improvements for the trace SDK `SetAttributes` method in `Span`. (#5864) +- Reduce memory allocations for the `Event` and `Link` lists in `Span`. (#5858) +- Performance improvements for the trace SDK `AddEvent`, `AddLink`, `RecordError` and `End` methods in `Span`. (#5874) + +### Deprecated + +- Deprecate all examples under `go.opentelemetry.io/otel/example` as they are moved to [Contrib repository](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples). (#5854) + +### Fixed + +- The race condition for multiple `FixedSize` exemplar reservoirs identified in #5814 is resolved. (#5819) +- Fix log records duplication in case of heterogeneous resource attributes by correctly mapping each log record to it's resource and scope. (#5803) +- Fix timer channel drain to avoid hanging on Go 1.23. (#5868) +- Fix delegation for global meter providers, and panic when calling otel.SetMeterProvider. (#5827) +- Change the `reflect.TypeOf` to use a nil pointer to not allocate on the heap unless necessary. (#5827) + ## [1.30.0/0.52.0/0.6.0/0.0.9] 2024-09-09 ### Added @@ -3081,7 +3110,8 @@ It contains api and sdk for trace and meter. - CircleCI build CI manifest files. - CODEOWNERS file to track owners of this project. -[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.30.0...HEAD +[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.31.0...HEAD +[1.31.0/0.53.0/0.7.0/0.0.10]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.31.0 [1.30.0/0.52.0/0.6.0/0.0.9]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.30.0 [1.29.0/0.51.0/0.5.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.29.0 [1.28.0/0.50.0/0.4.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.28.0 diff --git a/vendor/go.opentelemetry.io/otel/CODEOWNERS b/vendor/go.opentelemetry.io/otel/CODEOWNERS index 5904bb7070e..945a07d2b07 100644 --- a/vendor/go.opentelemetry.io/otel/CODEOWNERS +++ b/vendor/go.opentelemetry.io/otel/CODEOWNERS @@ -12,6 +12,6 @@ # https://help.github.com/en/articles/about-code-owners # -* @MrAlias @XSAM @dashpole @MadVikingGod @pellared @hanyuancheung @dmathieu +* @MrAlias @XSAM @dashpole @pellared @dmathieu -CODEOWNERS @MrAlias @MadVikingGod @pellared @dashpole @XSAM @dmathieu +CODEOWNERS @MrAlias @pellared @dashpole @XSAM @dmathieu diff --git a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md index 91580725350..bb339655743 100644 --- a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md +++ b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md @@ -631,11 +631,8 @@ should be canceled. ### Approvers -- [Chester Cheung](https://github.com/hanyuancheung), Tencent - ### Maintainers -- [Aaron Clawson](https://github.com/MadVikingGod), LightStep - [Damien Mathieu](https://github.com/dmathieu), Elastic - [David Ashpole](https://github.com/dashpole), Google - [Robert Pająk](https://github.com/pellared), Splunk @@ -644,11 +641,13 @@ should be canceled. ### Emeritus -- [Liz Fong-Jones](https://github.com/lizthegrey), Honeycomb -- [Gustavo Silva Paiva](https://github.com/paivagustavo), LightStep -- [Josh MacDonald](https://github.com/jmacd), LightStep +- [Aaron Clawson](https://github.com/MadVikingGod), LightStep - [Anthony Mirabella](https://github.com/Aneurysm9), AWS +- [Chester Cheung](https://github.com/hanyuancheung), Tencent - [Evan Torrie](https://github.com/evantorrie), Yahoo +- [Gustavo Silva Paiva](https://github.com/paivagustavo), LightStep +- [Josh MacDonald](https://github.com/jmacd), LightStep +- [Liz Fong-Jones](https://github.com/lizthegrey), Honeycomb ### Become an Approver or a Maintainer diff --git a/vendor/go.opentelemetry.io/otel/Makefile b/vendor/go.opentelemetry.io/otel/Makefile index b04695b242f..a1228a21240 100644 --- a/vendor/go.opentelemetry.io/otel/Makefile +++ b/vendor/go.opentelemetry.io/otel/Makefile @@ -54,9 +54,6 @@ $(TOOLS)/stringer: PACKAGE=golang.org/x/tools/cmd/stringer PORTO = $(TOOLS)/porto $(TOOLS)/porto: PACKAGE=github.com/jcchavezs/porto/cmd/porto -GOJQ = $(TOOLS)/gojq -$(TOOLS)/gojq: PACKAGE=github.com/itchyny/gojq/cmd/gojq - GOTMPL = $(TOOLS)/gotmpl $(GOTMPL): PACKAGE=go.opentelemetry.io/build-tools/gotmpl @@ -67,7 +64,7 @@ GOVULNCHECK = $(TOOLS)/govulncheck $(TOOLS)/govulncheck: PACKAGE=golang.org/x/vuln/cmd/govulncheck .PHONY: tools -tools: $(CROSSLINK) $(GOLANGCI_LINT) $(MISSPELL) $(GOCOVMERGE) $(STRINGER) $(PORTO) $(GOJQ) $(SEMCONVGEN) $(MULTIMOD) $(SEMCONVKIT) $(GOTMPL) $(GORELEASE) +tools: $(CROSSLINK) $(GOLANGCI_LINT) $(MISSPELL) $(GOCOVMERGE) $(STRINGER) $(PORTO) $(SEMCONVGEN) $(MULTIMOD) $(SEMCONVKIT) $(GOTMPL) $(GORELEASE) # Virtualized python tools via docker diff --git a/vendor/go.opentelemetry.io/otel/README.md b/vendor/go.opentelemetry.io/otel/README.md index 9a65707038c..efec278905b 100644 --- a/vendor/go.opentelemetry.io/otel/README.md +++ b/vendor/go.opentelemetry.io/otel/README.md @@ -89,8 +89,8 @@ If you need to extend the telemetry an instrumentation library provides or want to build your own instrumentation for your application directly you will need to use the [Go otel](https://pkg.go.dev/go.opentelemetry.io/otel) -package. The included [examples](./example/) are a good way to see some -practical uses of this process. +package. The [examples](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples) +are a good way to see some practical uses of this process. ### Export diff --git a/vendor/go.opentelemetry.io/otel/RELEASING.md b/vendor/go.opentelemetry.io/otel/RELEASING.md index 59992984d42..ffa9b61258a 100644 --- a/vendor/go.opentelemetry.io/otel/RELEASING.md +++ b/vendor/go.opentelemetry.io/otel/RELEASING.md @@ -111,17 +111,6 @@ It is critical you make sure the version you push upstream is correct. Finally create a Release for the new `` on GitHub. The release body should include all the release notes from the Changelog for this release. -## Verify Examples - -After releasing verify that examples build outside of the repository. - -``` -./verify_examples.sh -``` - -The script copies examples into a different directory removes any `replace` declarations in `go.mod` and builds them. -This ensures they build with the published release, not the local copy. - ## Post-Release ### Contrib Repository diff --git a/vendor/go.opentelemetry.io/otel/attribute/set.go b/vendor/go.opentelemetry.io/otel/attribute/set.go index bff9c7fdbb9..6cbefceadfe 100644 --- a/vendor/go.opentelemetry.io/otel/attribute/set.go +++ b/vendor/go.opentelemetry.io/otel/attribute/set.go @@ -347,45 +347,25 @@ func computeDistinct(kvs []KeyValue) Distinct { func computeDistinctFixed(kvs []KeyValue) interface{} { switch len(kvs) { case 1: - ptr := new([1]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [1]KeyValue(kvs) case 2: - ptr := new([2]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [2]KeyValue(kvs) case 3: - ptr := new([3]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [3]KeyValue(kvs) case 4: - ptr := new([4]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [4]KeyValue(kvs) case 5: - ptr := new([5]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [5]KeyValue(kvs) case 6: - ptr := new([6]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [6]KeyValue(kvs) case 7: - ptr := new([7]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [7]KeyValue(kvs) case 8: - ptr := new([8]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [8]KeyValue(kvs) case 9: - ptr := new([9]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [9]KeyValue(kvs) case 10: - ptr := new([10]KeyValue) - copy((*ptr)[:], kvs) - return *ptr + return [10]KeyValue(kvs) default: return nil } diff --git a/vendor/go.opentelemetry.io/otel/internal/global/meter.go b/vendor/go.opentelemetry.io/otel/internal/global/meter.go index f2fc3929b11..e3db438a09f 100644 --- a/vendor/go.opentelemetry.io/otel/internal/global/meter.go +++ b/vendor/go.opentelemetry.io/otel/internal/global/meter.go @@ -152,14 +152,17 @@ func (m *meter) Int64Counter(name string, options ...metric.Int64CounterOption) return m.delegate.Int64Counter(name, options...) } - i := &siCounter{name: name, opts: options} cfg := metric.NewInt64CounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*siCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64Counter), nil + } + i := &siCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -172,14 +175,17 @@ func (m *meter) Int64UpDownCounter(name string, options ...metric.Int64UpDownCou return m.delegate.Int64UpDownCounter(name, options...) } - i := &siUpDownCounter{name: name, opts: options} cfg := metric.NewInt64UpDownCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*siUpDownCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64UpDownCounter), nil + } + i := &siUpDownCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -192,14 +198,17 @@ func (m *meter) Int64Histogram(name string, options ...metric.Int64HistogramOpti return m.delegate.Int64Histogram(name, options...) } - i := &siHistogram{name: name, opts: options} cfg := metric.NewInt64HistogramConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*siHistogram)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64Histogram), nil + } + i := &siHistogram{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -212,14 +221,17 @@ func (m *meter) Int64Gauge(name string, options ...metric.Int64GaugeOption) (met return m.delegate.Int64Gauge(name, options...) } - i := &siGauge{name: name, opts: options} cfg := metric.NewInt64GaugeConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*siGauge)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64Gauge), nil + } + i := &siGauge{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -232,14 +244,17 @@ func (m *meter) Int64ObservableCounter(name string, options ...metric.Int64Obser return m.delegate.Int64ObservableCounter(name, options...) } - i := &aiCounter{name: name, opts: options} cfg := metric.NewInt64ObservableCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*aiCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64ObservableCounter), nil + } + i := &aiCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -252,14 +267,17 @@ func (m *meter) Int64ObservableUpDownCounter(name string, options ...metric.Int6 return m.delegate.Int64ObservableUpDownCounter(name, options...) } - i := &aiUpDownCounter{name: name, opts: options} cfg := metric.NewInt64ObservableUpDownCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*aiUpDownCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64ObservableUpDownCounter), nil + } + i := &aiUpDownCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -272,14 +290,17 @@ func (m *meter) Int64ObservableGauge(name string, options ...metric.Int64Observa return m.delegate.Int64ObservableGauge(name, options...) } - i := &aiGauge{name: name, opts: options} cfg := metric.NewInt64ObservableGaugeConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*aiGauge)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Int64ObservableGauge), nil + } + i := &aiGauge{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -292,14 +313,17 @@ func (m *meter) Float64Counter(name string, options ...metric.Float64CounterOpti return m.delegate.Float64Counter(name, options...) } - i := &sfCounter{name: name, opts: options} cfg := metric.NewFloat64CounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*sfCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64Counter), nil + } + i := &sfCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -312,14 +336,17 @@ func (m *meter) Float64UpDownCounter(name string, options ...metric.Float64UpDow return m.delegate.Float64UpDownCounter(name, options...) } - i := &sfUpDownCounter{name: name, opts: options} cfg := metric.NewFloat64UpDownCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*sfUpDownCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64UpDownCounter), nil + } + i := &sfUpDownCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -332,14 +359,17 @@ func (m *meter) Float64Histogram(name string, options ...metric.Float64Histogram return m.delegate.Float64Histogram(name, options...) } - i := &sfHistogram{name: name, opts: options} cfg := metric.NewFloat64HistogramConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*sfHistogram)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64Histogram), nil + } + i := &sfHistogram{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -352,14 +382,17 @@ func (m *meter) Float64Gauge(name string, options ...metric.Float64GaugeOption) return m.delegate.Float64Gauge(name, options...) } - i := &sfGauge{name: name, opts: options} cfg := metric.NewFloat64GaugeConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*sfGauge)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64Gauge), nil + } + i := &sfGauge{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -372,14 +405,17 @@ func (m *meter) Float64ObservableCounter(name string, options ...metric.Float64O return m.delegate.Float64ObservableCounter(name, options...) } - i := &afCounter{name: name, opts: options} cfg := metric.NewFloat64ObservableCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*afCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64ObservableCounter), nil + } + i := &afCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -392,14 +428,17 @@ func (m *meter) Float64ObservableUpDownCounter(name string, options ...metric.Fl return m.delegate.Float64ObservableUpDownCounter(name, options...) } - i := &afUpDownCounter{name: name, opts: options} cfg := metric.NewFloat64ObservableUpDownCounterConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*afUpDownCounter)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64ObservableUpDownCounter), nil + } + i := &afUpDownCounter{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -412,14 +451,17 @@ func (m *meter) Float64ObservableGauge(name string, options ...metric.Float64Obs return m.delegate.Float64ObservableGauge(name, options...) } - i := &afGauge{name: name, opts: options} cfg := metric.NewFloat64ObservableGaugeConfig(options...) id := instID{ name: name, - kind: reflect.TypeOf(i), + kind: reflect.TypeOf((*afGauge)(nil)), description: cfg.Description(), unit: cfg.Unit(), } + if f, ok := m.instruments[id]; ok { + return f.(metric.Float64ObservableGauge), nil + } + i := &afGauge{name: name, opts: options} m.instruments[id] = i return i, nil } @@ -487,6 +529,7 @@ func (c *registration) setDelegate(m metric.Meter) { reg, err := m.RegisterCallback(c.function, insts...) if err != nil { GetErrorHandler().Handle(err) + return } c.unreg = reg.Unregister diff --git a/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go b/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go index 9b1da2c02b9..b2fe3e41d3b 100644 --- a/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go +++ b/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go @@ -20,7 +20,8 @@ func RawToBool(r uint64) bool { } func Int64ToRaw(i int64) uint64 { - return uint64(i) + // Assumes original was a valid int64 (overflow not checked). + return uint64(i) // nolint: gosec } func RawToInt64(r uint64) int64 { diff --git a/vendor/go.opentelemetry.io/otel/metric/instrument.go b/vendor/go.opentelemetry.io/otel/metric/instrument.go index ea52e402331..a535782e1d9 100644 --- a/vendor/go.opentelemetry.io/otel/metric/instrument.go +++ b/vendor/go.opentelemetry.io/otel/metric/instrument.go @@ -351,7 +351,7 @@ func WithAttributeSet(attributes attribute.Set) MeasurementOption { // // cp := make([]attribute.KeyValue, len(attributes)) // copy(cp, attributes) -// WithAttributes(attribute.NewSet(cp...)) +// WithAttributeSet(attribute.NewSet(cp...)) // // [attribute.NewSet] may modify the passed attributes so this will make a copy // of attributes before creating a set in order to ensure this function is diff --git a/vendor/go.opentelemetry.io/otel/renovate.json b/vendor/go.opentelemetry.io/otel/renovate.json index 4d36b98cf48..0a29a2f13d8 100644 --- a/vendor/go.opentelemetry.io/otel/renovate.json +++ b/vendor/go.opentelemetry.io/otel/renovate.json @@ -23,6 +23,10 @@ { "matchPackageNames": ["google.golang.org/genproto/googleapis/**"], "groupName": "googleapis" + }, + { + "matchPackageNames": ["golang.org/x/**"], + "groupName": "golang.org/x" } ] } diff --git a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/library.go b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/library.go index f4d1857c4f4..f2cdf3c6518 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/library.go +++ b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/library.go @@ -4,5 +4,6 @@ package instrumentation // import "go.opentelemetry.io/otel/sdk/instrumentation" // Library represents the instrumentation library. -// Deprecated: please use Scope instead. +// +// Deprecated: use [Scope] instead. type Library = Scope diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go index 71386e2da4c..3677c83d7da 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go @@ -10,17 +10,16 @@ import ( "golang.org/x/sys/windows/registry" ) -// implements hostIDReader +// implements hostIDReader. type hostIDReaderWindows struct{} -// read reads MachineGuid from the windows registry key: -// SOFTWARE\Microsoft\Cryptography +// read reads MachineGuid from the Windows registry key: +// SOFTWARE\Microsoft\Cryptography. func (*hostIDReaderWindows) read() (string, error) { k, err := registry.OpenKey( registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Cryptography`, registry.QUERY_VALUE|registry.WOW64_64KEY, ) - if err != nil { return "", err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go index 5e3d199d785..a6a5a53c0ea 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go @@ -17,7 +17,6 @@ import ( func platformOSDescription() (string, error) { k, err := registry.OpenKey( registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE) - if err != nil { return "", err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go index 1d399a75db2..4ce757dfd6b 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go @@ -316,7 +316,11 @@ func (bsp *batchSpanProcessor) processQueue() { bsp.batchMutex.Unlock() if shouldExport { if !bsp.timer.Stop() { - <-bsp.timer.C + // Handle both GODEBUG=asynctimerchan=[0|1] properly. + select { + case <-bsp.timer.C: + default: + } } if err := bsp.exportSpans(ctx); err != nil { otel.Handle(err) diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go index 821c83faa1d..8c308dd60a9 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go @@ -12,25 +12,26 @@ import ( // evictedQueue is a FIFO queue with a configurable capacity. type evictedQueue[T any] struct { - queue []T - capacity int - droppedCount int - logDropped func() + queue []T + capacity int + droppedCount int + logDroppedMsg string + logDroppedOnce sync.Once } func newEvictedQueueEvent(capacity int) evictedQueue[Event] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Event]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Event", } } func newEvictedQueueLink(capacity int) evictedQueue[Link] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Link]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Link", } } @@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) { eq.queue = append(eq.queue, value) } +func (eq *evictedQueue[T]) logDropped() { + eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) }) +} + // copy returns a copy of the evictedQueue. func (eq *evictedQueue[T]) copy() []T { return slices.Clone(eq.queue) diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/snapshot.go b/vendor/go.opentelemetry.io/otel/sdk/trace/snapshot.go index 32f862790c7..d511d0f271f 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/snapshot.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/snapshot.go @@ -99,7 +99,7 @@ func (s snapshot) InstrumentationScope() instrumentation.Scope { // InstrumentationLibrary returns information about the instrumentation // library that created the span. -func (s snapshot) InstrumentationLibrary() instrumentation.Library { +func (s snapshot) InstrumentationLibrary() instrumentation.Library { //nolint:staticcheck // This method needs to be define for backwards compatibility return s.instrumentationScope } diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go index ac90f1a2600..730fb85c3ef 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go @@ -62,7 +62,7 @@ type ReadOnlySpan interface { // InstrumentationLibrary returns information about the instrumentation // library that created the span. // Deprecated: please use InstrumentationScope instead. - InstrumentationLibrary() instrumentation.Library + InstrumentationLibrary() instrumentation.Library //nolint:staticcheck // This method needs to be define for backwards compatibility // Resource returns information about the entity that produced the span. Resource() *resource.Resource // DroppedAttributes returns the number of attributes dropped by the span @@ -174,6 +174,17 @@ func (s *recordingSpan) IsRecording() bool { s.mu.Lock() defer s.mu.Unlock() + return s.isRecording() +} + +// isRecording returns if this span is being recorded. If this span has ended +// this will return false. +// +// This method assumes s.mu.Lock is held by the caller. +func (s *recordingSpan) isRecording() bool { + if s == nil { + return false + } return s.endTime.IsZero() } @@ -182,11 +193,15 @@ func (s *recordingSpan) IsRecording() bool { // included in the set status when the code is for an error. If this span is // not being recorded than this method does nothing. func (s *recordingSpan) SetStatus(code codes.Code, description string) { - if !s.IsRecording() { + if s == nil { return } + s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } if s.status.Code > code { return } @@ -210,12 +225,15 @@ func (s *recordingSpan) SetStatus(code codes.Code, description string) { // attributes the span is configured to have, the last added attributes will // be dropped. func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) { - if !s.IsRecording() { + if s == nil || len(attributes) == 0 { return } s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } limit := s.tracer.provider.spanLimits.AttributeCountLimit if limit == 0 { @@ -233,7 +251,7 @@ func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) { // Otherwise, add without deduplication. When attributes are read they // will be deduplicated, optimizing the operation. - s.attributes = slices.Grow(s.attributes, len(s.attributes)+len(attributes)) + s.attributes = slices.Grow(s.attributes, len(attributes)) for _, a := range attributes { if !a.Valid() { // Drop all invalid attributes. @@ -280,13 +298,17 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { // Do not set a capacity when creating this map. Benchmark testing has // showed this to only add unused memory allocations in general use. - exists := make(map[attribute.Key]int) - s.dedupeAttrsFromRecord(&exists) + exists := make(map[attribute.Key]int, len(s.attributes)) + s.dedupeAttrsFromRecord(exists) // Now that s.attributes is deduplicated, adding unique attributes up to // the capacity of s will not over allocate s.attributes. - sum := len(attrs) + len(s.attributes) - s.attributes = slices.Grow(s.attributes, min(sum, limit)) + + // max size = limit + maxCap := min(len(attrs)+len(s.attributes), limit) + if cap(s.attributes) < maxCap { + s.attributes = slices.Grow(s.attributes, maxCap-cap(s.attributes)) + } for _, a := range attrs { if !a.Valid() { // Drop all invalid attributes. @@ -296,6 +318,7 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { if idx, ok := exists[a.Key]; ok { // Perform all updates before dropping, even when at capacity. + a = truncateAttr(s.tracer.provider.spanLimits.AttributeValueLengthLimit, a) s.attributes[idx] = a continue } @@ -386,9 +409,10 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) { // the span's duration in case some operation below takes a while. et := monotonicEndTime(s.startTime) - // Do relative expensive check now that we have an end time and see if we - // need to do any more processing. - if !s.IsRecording() { + // Lock the span now that we have an end time and see if we need to do any more processing. + s.mu.Lock() + if !s.isRecording() { + s.mu.Unlock() return } @@ -413,10 +437,11 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) { } if s.executionTracerTaskEnd != nil { + s.mu.Unlock() s.executionTracerTaskEnd() + s.mu.Lock() } - s.mu.Lock() // Setting endTime to non-zero marks the span as ended and not recording. if config.Timestamp().IsZero() { s.endTime = et @@ -450,7 +475,13 @@ func monotonicEndTime(start time.Time) time.Time { // does not change the Span status. If this span is not being recorded or err is nil // than this method does nothing. func (s *recordingSpan) RecordError(err error, opts ...trace.EventOption) { - if s == nil || err == nil || !s.IsRecording() { + if s == nil || err == nil { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { return } @@ -486,14 +517,23 @@ func recordStackTrace() string { } // AddEvent adds an event with the provided name and options. If this span is -// not being recorded than this method does nothing. +// not being recorded then this method does nothing. func (s *recordingSpan) AddEvent(name string, o ...trace.EventOption) { - if !s.IsRecording() { + if s == nil { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { return } s.addEvent(name, o...) } +// addEvent adds an event with the provided name and options. +// +// This method assumes s.mu.Lock is held by the caller. func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) { c := trace.NewEventConfig(o...) e := Event{Name: name, Attributes: c.Attributes(), Time: c.Timestamp()} @@ -510,20 +550,21 @@ func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) { e.Attributes = e.Attributes[:limit] } - s.mu.Lock() s.events.add(e) - s.mu.Unlock() } // SetName sets the name of this span. If this span is not being recorded than // this method does nothing. func (s *recordingSpan) SetName(name string) { - if !s.IsRecording() { + if s == nil { return } s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } s.name = name } @@ -579,23 +620,23 @@ func (s *recordingSpan) Attributes() []attribute.KeyValue { func (s *recordingSpan) dedupeAttrs() { // Do not set a capacity when creating this map. Benchmark testing has // showed this to only add unused memory allocations in general use. - exists := make(map[attribute.Key]int) - s.dedupeAttrsFromRecord(&exists) + exists := make(map[attribute.Key]int, len(s.attributes)) + s.dedupeAttrsFromRecord(exists) } // dedupeAttrsFromRecord deduplicates the attributes of s to fit capacity // using record as the record of unique attribute keys to their index. // // This method assumes s.mu.Lock is held by the caller. -func (s *recordingSpan) dedupeAttrsFromRecord(record *map[attribute.Key]int) { +func (s *recordingSpan) dedupeAttrsFromRecord(record map[attribute.Key]int) { // Use the fact that slices share the same backing array. unique := s.attributes[:0] for _, a := range s.attributes { - if idx, ok := (*record)[a.Key]; ok { + if idx, ok := record[a.Key]; ok { unique[idx] = a } else { unique = append(unique, a) - (*record)[a.Key] = len(unique) - 1 + record[a.Key] = len(unique) - 1 } } // s.attributes have element types of attribute.KeyValue. These types are @@ -642,7 +683,7 @@ func (s *recordingSpan) InstrumentationScope() instrumentation.Scope { // InstrumentationLibrary returns the instrumentation.Library associated with // the Tracer that created this span. -func (s *recordingSpan) InstrumentationLibrary() instrumentation.Library { +func (s *recordingSpan) InstrumentationLibrary() instrumentation.Library { //nolint:staticcheck // This method needs to be define for backwards compatibility s.mu.Lock() defer s.mu.Unlock() return s.tracer.instrumentationScope @@ -657,7 +698,7 @@ func (s *recordingSpan) Resource() *resource.Resource { } func (s *recordingSpan) AddLink(link trace.Link) { - if !s.IsRecording() { + if s == nil { return } if !link.SpanContext.IsValid() && len(link.Attributes) == 0 && @@ -665,6 +706,12 @@ func (s *recordingSpan) AddLink(link trace.Link) { return } + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { + return + } + l := Link{SpanContext: link.SpanContext, Attributes: link.Attributes} // Discard attributes over limit. @@ -678,9 +725,7 @@ func (s *recordingSpan) AddLink(link trace.Link) { l.Attributes = l.Attributes[:limit] } - s.mu.Lock() s.links.add(l) - s.mu.Unlock() } // DroppedAttributes returns the number of attributes dropped by the span @@ -755,12 +800,16 @@ func (s *recordingSpan) snapshot() ReadOnlySpan { } func (s *recordingSpan) addChild() { - if !s.IsRecording() { + if s == nil { return } + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { + return + } s.childSpanCount++ - s.mu.Unlock() } func (*recordingSpan) private() {} diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/tracetest/span.go b/vendor/go.opentelemetry.io/otel/sdk/trace/tracetest/span.go index 0a641f94889..cd2cc30ca2d 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/tracetest/span.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/tracetest/span.go @@ -45,22 +45,25 @@ func (s SpanStubs) Snapshots() []tracesdk.ReadOnlySpan { // SpanStub is a stand-in for a Span. type SpanStub struct { - Name string - SpanContext trace.SpanContext - Parent trace.SpanContext - SpanKind trace.SpanKind - StartTime time.Time - EndTime time.Time - Attributes []attribute.KeyValue - Events []tracesdk.Event - Links []tracesdk.Link - Status tracesdk.Status - DroppedAttributes int - DroppedEvents int - DroppedLinks int - ChildSpanCount int - Resource *resource.Resource - InstrumentationLibrary instrumentation.Library + Name string + SpanContext trace.SpanContext + Parent trace.SpanContext + SpanKind trace.SpanKind + StartTime time.Time + EndTime time.Time + Attributes []attribute.KeyValue + Events []tracesdk.Event + Links []tracesdk.Link + Status tracesdk.Status + DroppedAttributes int + DroppedEvents int + DroppedLinks int + ChildSpanCount int + Resource *resource.Resource + InstrumentationScope instrumentation.Scope + + // Deprecated: use InstrumentationScope instead. + InstrumentationLibrary instrumentation.Library //nolint:staticcheck // This method needs to be define for backwards compatibility } // SpanStubFromReadOnlySpan returns a SpanStub populated from ro. @@ -85,12 +88,18 @@ func SpanStubFromReadOnlySpan(ro tracesdk.ReadOnlySpan) SpanStub { DroppedLinks: ro.DroppedLinks(), ChildSpanCount: ro.ChildSpanCount(), Resource: ro.Resource(), + InstrumentationScope: ro.InstrumentationScope(), InstrumentationLibrary: ro.InstrumentationScope(), } } // Snapshot returns a read-only copy of the SpanStub. func (s SpanStub) Snapshot() tracesdk.ReadOnlySpan { + scopeOrLibrary := s.InstrumentationScope + if scopeOrLibrary.Name == "" && scopeOrLibrary.Version == "" && scopeOrLibrary.SchemaURL == "" { + scopeOrLibrary = s.InstrumentationLibrary + } + return spanSnapshot{ name: s.Name, spanContext: s.SpanContext, @@ -107,7 +116,7 @@ func (s SpanStub) Snapshot() tracesdk.ReadOnlySpan { droppedLinks: s.DroppedLinks, childSpanCount: s.ChildSpanCount, resource: s.Resource, - instrumentationScope: s.InstrumentationLibrary, + instrumentationScope: scopeOrLibrary, } } @@ -152,6 +161,6 @@ func (s spanSnapshot) InstrumentationScope() instrumentation.Scope { return s.instrumentationScope } -func (s spanSnapshot) InstrumentationLibrary() instrumentation.Library { +func (s spanSnapshot) InstrumentationLibrary() instrumentation.Library { //nolint:staticcheck // This method needs to be define for backwards compatibility return s.instrumentationScope } diff --git a/vendor/go.opentelemetry.io/otel/sdk/version.go b/vendor/go.opentelemetry.io/otel/sdk/version.go index 33d065a7cb9..dc1eaa8e9d0 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/version.go +++ b/vendor/go.opentelemetry.io/otel/sdk/version.go @@ -5,5 +5,5 @@ package sdk // import "go.opentelemetry.io/otel/sdk" // Version is the current release version of the OpenTelemetry SDK in use. func Version() string { - return "1.28.0" + return "1.31.0" } diff --git a/vendor/go.opentelemetry.io/otel/verify_examples.sh b/vendor/go.opentelemetry.io/otel/verify_examples.sh deleted file mode 100644 index e57bf57fce8..00000000000 --- a/vendor/go.opentelemetry.io/otel/verify_examples.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# Copyright The OpenTelemetry Authors -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -cd $(dirname $0) -TOOLS_DIR=$(pwd)/.tools - -if [ -z "${GOPATH}" ] ; then - printf "GOPATH is not defined.\n" - exit -1 -fi - -if [ ! -d "${GOPATH}" ] ; then - printf "GOPATH ${GOPATH} is invalid \n" - exit -1 -fi - -# Pre-requisites -if ! git diff --quiet; then \ - git status - printf "\n\nError: working tree is not clean\n" - exit -1 -fi - -if [ "$(git tag --contains $(git log -1 --pretty=format:"%H"))" = "" ] ; then - printf "$(git log -1)" - printf "\n\nError: HEAD is not pointing to a tagged version" -fi - -make ${TOOLS_DIR}/gojq - -DIR_TMP="${GOPATH}/src/oteltmp/" -rm -rf $DIR_TMP -mkdir -p $DIR_TMP - -printf "Copy examples to ${DIR_TMP}\n" -cp -a ./example ${DIR_TMP} - -# Update go.mod files -printf "Update go.mod: rename module and remove replace\n" - -PACKAGE_DIRS=$(find . -mindepth 2 -type f -name 'go.mod' -exec dirname {} \; | egrep 'example' | sed 's/^\.\///' | sort) - -for dir in $PACKAGE_DIRS; do - printf " Update go.mod for $dir\n" - (cd "${DIR_TMP}/${dir}" && \ - # replaces is ("mod1" "mod2" …) - replaces=($(go mod edit -json | ${TOOLS_DIR}/gojq '.Replace[].Old.Path')) && \ - # strip double quotes - replaces=("${replaces[@]%\"}") && \ - replaces=("${replaces[@]#\"}") && \ - # make an array (-dropreplace=mod1 -dropreplace=mod2 …) - dropreplaces=("${replaces[@]/#/-dropreplace=}") && \ - go mod edit -module "oteltmp/${dir}" "${dropreplaces[@]}" && \ - go mod tidy) -done -printf "Update done:\n\n" - -# Build directories that contain main package. These directories are different than -# directories that contain go.mod files. -printf "Build examples:\n" -EXAMPLES=$(./get_main_pkgs.sh ./example) -for ex in $EXAMPLES; do - printf " Build $ex in ${DIR_TMP}/${ex}\n" - (cd "${DIR_TMP}/${ex}" && \ - go build .) -done - -# Cleanup -printf "Remove copied files.\n" -rm -rf $DIR_TMP diff --git a/vendor/go.opentelemetry.io/otel/version.go b/vendor/go.opentelemetry.io/otel/version.go index 78b40f3ed24..6d3c7b1f40e 100644 --- a/vendor/go.opentelemetry.io/otel/version.go +++ b/vendor/go.opentelemetry.io/otel/version.go @@ -5,5 +5,5 @@ package otel // import "go.opentelemetry.io/otel" // Version is the current release version of OpenTelemetry in use. func Version() string { - return "1.30.0" + return "1.31.0" } diff --git a/vendor/go.opentelemetry.io/otel/versions.yaml b/vendor/go.opentelemetry.io/otel/versions.yaml index 0c32f4fc46e..cdebdb5eb78 100644 --- a/vendor/go.opentelemetry.io/otel/versions.yaml +++ b/vendor/go.opentelemetry.io/otel/versions.yaml @@ -3,7 +3,7 @@ module-sets: stable-v1: - version: v1.30.0 + version: v1.31.0 modules: - go.opentelemetry.io/otel - go.opentelemetry.io/otel/bridge/opencensus @@ -29,12 +29,12 @@ module-sets: - go.opentelemetry.io/otel/sdk/metric - go.opentelemetry.io/otel/trace experimental-metrics: - version: v0.52.0 + version: v0.53.0 modules: - go.opentelemetry.io/otel/example/prometheus - go.opentelemetry.io/otel/exporters/prometheus experimental-logs: - version: v0.6.0 + version: v0.7.0 modules: - go.opentelemetry.io/otel/log - go.opentelemetry.io/otel/sdk/log @@ -42,7 +42,7 @@ module-sets: - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp - go.opentelemetry.io/otel/exporters/stdout/stdoutlog experimental-schema: - version: v0.0.9 + version: v0.0.10 modules: - go.opentelemetry.io/otel/schema excluded-modules: diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE index 6a66aea5eaf..2a7cf70da6e 100644 --- a/vendor/golang.org/x/sys/LICENSE +++ b/vendor/golang.org/x/sys/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index 8fa707aa4ba..02609d5b21d 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -105,6 +105,8 @@ var ARM64 struct { HasSVE bool // Scalable Vector Extensions HasSVE2 bool // Scalable Vector Extensions 2 HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + HasDIT bool // Data Independent Timing support + HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions _ CacheLinePad } @@ -199,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go index 0e27a21e1f8..af2aa99f9f0 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go @@ -38,6 +38,8 @@ func initOptions() { {Name: "dcpop", Feature: &ARM64.HasDCPOP}, {Name: "asimddp", Feature: &ARM64.HasASIMDDP}, {Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, + {Name: "dit", Feature: &ARM64.HasDIT}, + {Name: "i8mm", Feature: &ARM64.HasI8MM}, } } @@ -145,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { ARM64.HasLRCPC = true } + switch extractBits(isar1, 52, 55) { + case 1: + ARM64.HasI8MM = true + } + // ID_AA64PFR0_EL1 switch extractBits(pfr0, 16, 19) { case 0: @@ -168,6 +175,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { parseARM64SVERegister(getzfr0()) } + + switch extractBits(pfr0, 48, 51) { + case 1: + ARM64.HasDIT = true + } } func parseARM64SVERegister(zfr0 uint64) { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index 3d386d0fc21..08f35ea1773 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -35,8 +35,10 @@ const ( hwcap_SHA512 = 1 << 21 hwcap_SVE = 1 << 22 hwcap_ASIMDFHM = 1 << 23 + hwcap_DIT = 1 << 24 hwcap2_SVE2 = 1 << 1 + hwcap2_I8MM = 1 << 13 ) // linuxKernelCanEmulateCPUID reports whether we're running @@ -106,9 +108,12 @@ func doinit() { ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) ARM64.HasSVE = isSet(hwCap, hwcap_SVE) ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) + ARM64.HasDIT = isSet(hwCap, hwcap_DIT) + // HWCAP2 feature bits ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) + ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) } func isSet(hwc uint, value uint) bool { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e733557..7d902b6847b 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 00000000000..cb4a0c57280 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c004b..aca3199c911 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md index 7d3c060e122..6e08a76a716 100644 --- a/vendor/golang.org/x/sys/unix/README.md +++ b/vendor/golang.org/x/sys/unix/README.md @@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these into a common file for each OS. The merge is performed in the following steps: -1. Construct the set of common code that is idential in all architecture-specific files. +1. Construct the set of common code that is identical in all architecture-specific files. 2. Write this common code to the merged file. 3. Remove the common code from all architecture-specific files. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index 4ed2e488b61..ac54ecaba0a 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -58,6 +58,7 @@ includes_Darwin=' #define _DARWIN_USE_64_BIT_INODE #define __APPLE_USE_RFC_3542 #include +#include #include #include #include @@ -551,6 +552,7 @@ ccflags="$@" $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || @@ -654,7 +656,7 @@ errors=$( signals=$( echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort ) @@ -664,7 +666,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags | sort >_error.grep echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort >_signal.grep echo '// mkerrors.sh' "$@" diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 67ce6cef2d5..6f15ba1eaff 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, var status _C_int var r Pid_t err = ERESTART - // AIX wait4 may return with ERESTART errno, while the processus is still + // AIX wait4 may return with ERESTART errno, while the process is still // active. for err == ERESTART { r, err = wait4(Pid_t(pid), &status, options, rusage) diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 4cc7b005967..099867deede 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -402,6 +402,18 @@ func IoctlSetIfreqMTU(fd int, ifreq *IfreqMTU) error { return ioctlPtr(fd, SIOCSIFMTU, unsafe.Pointer(ifreq)) } +//sys renamexNp(from string, to string, flag uint32) (err error) + +func RenamexNp(from string, to string, flag uint32) (err error) { + return renamexNp(from, to, flag) +} + +//sys renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) + +func RenameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + return renameatxNp(fromfd, from, tofd, to, flag) +} + //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS_SYSCTL func Uname(uname *Utsname) error { @@ -554,6 +566,43 @@ func PthreadFchdir(fd int) (err error) { return pthread_fchdir_np(fd) } +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651f8e3..a6a2d2fc2b9 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 5682e2628ad..f08abd434ff 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) { return &value, err } +// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPVegasInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPBBRInfo)(unsafe.Pointer(&value[0])) + return out, err +} + // GetsockoptString returns the string value of the socket option opt for the // socket associated with fd at the given socket level. func GetsockoptString(fd, level, opt int) (string, error) { @@ -1959,7 +2001,26 @@ func Getpgrp() (pid int) { //sysnb Getpid() (pid int) //sysnb Getppid() (ppid int) //sys Getpriority(which int, who int) (prio int, err error) -//sys Getrandom(buf []byte, flags int) (n int, err error) + +func Getrandom(buf []byte, flags int) (n int, err error) { + vdsoRet, supported := vgetrandom(buf, uint32(flags)) + if supported { + if vdsoRet < 0 { + return 0, errnoErr(syscall.Errno(-vdsoRet)) + } + return vdsoRet, nil + } + var p *byte + if len(buf) > 0 { + p = &buf[0] + } + r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags)) + if e != 0 { + return 0, errnoErr(e) + } + return int(r), nil +} + //sysnb Getrusage(who int, rusage *Rusage) (err error) //sysnb Getsid(pid int) (sid int, err error) //sysnb Gettid() (tid int) @@ -2592,3 +2653,4 @@ func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) { } //sys Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error) +//sys Mseal(b []byte, flags uint) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go index cf2ee6c75ef..745e5c7e6c0 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go @@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go index 3d0e98451f8..dd2262a4079 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go @@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go index 6f5a288944d..8cf3670bda6 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go @@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error } return riscvHWProbe(pairs, setSize, set, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_openbsd.go b/vendor/golang.org/x/sys/unix/syscall_openbsd.go index b25343c71a4..b86ded549c6 100644 --- a/vendor/golang.org/x/sys/unix/syscall_openbsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_openbsd.go @@ -293,6 +293,7 @@ func Uname(uname *Utsname) error { //sys Mkfifoat(dirfd int, path string, mode uint32) (err error) //sys Mknod(path string, mode uint32, dev int) (err error) //sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error) +//sys Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) //sys Nanosleep(time *Timespec, leftover *Timespec) (err error) //sys Open(path string, mode int, perm uint32) (fd int, err error) //sys Openat(dirfd int, path string, mode int, perm uint32) (fd int, err error) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go new file mode 100644 index 00000000000..07ac8e09d1b --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && go1.24 + +package unix + +import _ "unsafe" + +//go:linkname vgetrandom runtime.vgetrandom +//go:noescape +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go new file mode 100644 index 00000000000..297e97bce92 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !go1.24 + +package unix + +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) { + return -1, false +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index e40fa85245f..d73c4652e6c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index bb02aa6c056..4a55a400588 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index 877a62b479a..de3b462489c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -457,6 +457,7 @@ const ( B600 = 0x8 B75 = 0x2 B9600 = 0xd + BCACHEFS_SUPER_MAGIC = 0xca451a4e BDEVFS_MAGIC = 0x62646576 BINDERFS_SUPER_MAGIC = 0x6c6f6f70 BINFMTFS_MAGIC = 0x42494e4d @@ -494,6 +495,7 @@ const ( BPF_F_TEST_REG_INVARIANTS = 0x80 BPF_F_TEST_RND_HI32 = 0x4 BPF_F_TEST_RUN_ON_CPU = 0x1 + BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4 BPF_F_TEST_STATE_FREQ = 0x8 BPF_F_TEST_XDP_LIVE_FRAMES = 0x2 BPF_F_XDP_DEV_BOUND_ONLY = 0x40 @@ -928,6 +930,7 @@ const ( EPOLL_CTL_ADD = 0x1 EPOLL_CTL_DEL = 0x2 EPOLL_CTL_MOD = 0x3 + EPOLL_IOC_TYPE = 0x8a EROFS_SUPER_MAGIC_V1 = 0xe0f5e1e2 ESP_V4_FLOW = 0xa ESP_V6_FLOW = 0xc @@ -941,9 +944,6 @@ const ( ETHTOOL_FEC_OFF = 0x4 ETHTOOL_FEC_RS = 0x8 ETHTOOL_FLAG_ALL = 0x7 - ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 - ETHTOOL_FLAG_OMIT_REPLY = 0x2 - ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_FLASHDEV = 0x33 ETHTOOL_FLASH_MAX_FILENAME = 0x80 ETHTOOL_FWVERS_LEN = 0x20 @@ -1705,6 +1705,7 @@ const ( KEXEC_ARCH_S390 = 0x160000 KEXEC_ARCH_SH = 0x2a0000 KEXEC_ARCH_X86_64 = 0x3e0000 + KEXEC_CRASH_HOTPLUG_SUPPORT = 0x8 KEXEC_FILE_DEBUG = 0x8 KEXEC_FILE_NO_INITRAMFS = 0x4 KEXEC_FILE_ON_CRASH = 0x2 @@ -1780,6 +1781,7 @@ const ( KEY_SPEC_USER_KEYRING = -0x4 KEY_SPEC_USER_SESSION_KEYRING = -0x5 LANDLOCK_ACCESS_FS_EXECUTE = 0x1 + LANDLOCK_ACCESS_FS_IOCTL_DEV = 0x8000 LANDLOCK_ACCESS_FS_MAKE_BLOCK = 0x800 LANDLOCK_ACCESS_FS_MAKE_CHAR = 0x40 LANDLOCK_ACCESS_FS_MAKE_DIR = 0x80 @@ -1861,6 +1863,19 @@ const ( MAP_FILE = 0x0 MAP_FIXED = 0x10 MAP_FIXED_NOREPLACE = 0x100000 + MAP_HUGE_16GB = 0x88000000 + MAP_HUGE_16KB = 0x38000000 + MAP_HUGE_16MB = 0x60000000 + MAP_HUGE_1GB = 0x78000000 + MAP_HUGE_1MB = 0x50000000 + MAP_HUGE_256MB = 0x70000000 + MAP_HUGE_2GB = 0x7c000000 + MAP_HUGE_2MB = 0x54000000 + MAP_HUGE_32MB = 0x64000000 + MAP_HUGE_512KB = 0x4c000000 + MAP_HUGE_512MB = 0x74000000 + MAP_HUGE_64KB = 0x40000000 + MAP_HUGE_8MB = 0x5c000000 MAP_HUGE_MASK = 0x3f MAP_HUGE_SHIFT = 0x1a MAP_PRIVATE = 0x2 @@ -1908,6 +1923,7 @@ const ( MNT_EXPIRE = 0x4 MNT_FORCE = 0x1 MNT_ID_REQ_SIZE_VER0 = 0x18 + MNT_ID_REQ_SIZE_VER1 = 0x20 MODULE_INIT_COMPRESSED_FILE = 0x4 MODULE_INIT_IGNORE_MODVERSIONS = 0x1 MODULE_INIT_IGNORE_VERMAGIC = 0x2 @@ -2173,7 +2189,7 @@ const ( NFT_REG_SIZE = 0x10 NFT_REJECT_ICMPX_MAX = 0x3 NFT_RT_MAX = 0x4 - NFT_SECMARK_CTX_MAXLEN = 0x100 + NFT_SECMARK_CTX_MAXLEN = 0x1000 NFT_SET_MAXNAMELEN = 0x100 NFT_SOCKET_MAX = 0x3 NFT_TABLE_F_MASK = 0x7 @@ -2342,9 +2358,11 @@ const ( PERF_MEM_LVLNUM_IO = 0xa PERF_MEM_LVLNUM_L1 = 0x1 PERF_MEM_LVLNUM_L2 = 0x2 + PERF_MEM_LVLNUM_L2_MHB = 0x5 PERF_MEM_LVLNUM_L3 = 0x3 PERF_MEM_LVLNUM_L4 = 0x4 PERF_MEM_LVLNUM_LFB = 0xc + PERF_MEM_LVLNUM_MSC = 0x6 PERF_MEM_LVLNUM_NA = 0xf PERF_MEM_LVLNUM_PMEM = 0xe PERF_MEM_LVLNUM_RAM = 0xd @@ -2417,6 +2435,7 @@ const ( PRIO_PGRP = 0x1 PRIO_PROCESS = 0x0 PRIO_USER = 0x2 + PROCFS_IOCTL_MAGIC = 'f' PROC_SUPER_MAGIC = 0x9fa0 PROT_EXEC = 0x4 PROT_GROWSDOWN = 0x1000000 @@ -2498,6 +2517,23 @@ const ( PR_PAC_GET_ENABLED_KEYS = 0x3d PR_PAC_RESET_KEYS = 0x36 PR_PAC_SET_ENABLED_KEYS = 0x3c + PR_PPC_DEXCR_CTRL_CLEAR = 0x4 + PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC = 0x10 + PR_PPC_DEXCR_CTRL_EDITABLE = 0x1 + PR_PPC_DEXCR_CTRL_MASK = 0x1f + PR_PPC_DEXCR_CTRL_SET = 0x2 + PR_PPC_DEXCR_CTRL_SET_ONEXEC = 0x8 + PR_PPC_DEXCR_IBRTPD = 0x1 + PR_PPC_DEXCR_NPHIE = 0x3 + PR_PPC_DEXCR_SBHE = 0x0 + PR_PPC_DEXCR_SRAPD = 0x2 + PR_PPC_GET_DEXCR = 0x48 + PR_PPC_SET_DEXCR = 0x49 + PR_RISCV_CTX_SW_FENCEI_OFF = 0x1 + PR_RISCV_CTX_SW_FENCEI_ON = 0x0 + PR_RISCV_SCOPE_PER_PROCESS = 0x0 + PR_RISCV_SCOPE_PER_THREAD = 0x1 + PR_RISCV_SET_ICACHE_FLUSH_CTX = 0x47 PR_RISCV_V_GET_CONTROL = 0x46 PR_RISCV_V_SET_CONTROL = 0x45 PR_RISCV_V_VSTATE_CTRL_CUR_MASK = 0x3 @@ -2902,11 +2938,12 @@ const ( RUSAGE_SELF = 0x0 RUSAGE_THREAD = 0x1 RWF_APPEND = 0x10 + RWF_ATOMIC = 0x40 RWF_DSYNC = 0x2 RWF_HIPRI = 0x1 RWF_NOAPPEND = 0x20 RWF_NOWAIT = 0x8 - RWF_SUPPORTED = 0x3f + RWF_SUPPORTED = 0x7f RWF_SYNC = 0x4 RWF_WRITE_LIFE_NOT_SET = 0x0 SCHED_BATCH = 0x3 @@ -3179,6 +3216,7 @@ const ( STATX_ATTR_MOUNT_ROOT = 0x2000 STATX_ATTR_NODUMP = 0x40 STATX_ATTR_VERITY = 0x100000 + STATX_ATTR_WRITE_ATOMIC = 0x400000 STATX_BASIC_STATS = 0x7ff STATX_BLOCKS = 0x400 STATX_BTIME = 0x800 @@ -3192,8 +3230,10 @@ const ( STATX_MTIME = 0x40 STATX_NLINK = 0x4 STATX_SIZE = 0x200 + STATX_SUBVOL = 0x8000 STATX_TYPE = 0x1 STATX_UID = 0x8 + STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 @@ -3592,6 +3632,7 @@ const ( XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000 XDP_UMEM_PGOFF_FILL_RING = 0x100000000 XDP_UMEM_REG = 0x4 + XDP_UMEM_TX_METADATA_LEN = 0x4 XDP_UMEM_TX_SW_CSUM = 0x2 XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1 XDP_USE_NEED_WAKEUP = 0x8 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index e4bc0bd57c7..8aa6d77c018 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -151,9 +153,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index 689317afdbf..da428f42533 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -151,9 +153,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index 5cca668ac30..bf45bfec78a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index 14270508b04..71c67162b73 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 ESR_MAGIC = 0x45535201 EXTPROC = 0x10000 @@ -152,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index 28e39afdcb4..9476628fa02 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -152,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index cd66e92cb42..b9e85f3cf0c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index c1595eba78e..a48b68a7647 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index ee9456b0da7..ea00e8522a1 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index 8cfca81e1b5..91c64687176 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 60b0deb3af7..8cbf38d6390 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index f90aa7281bf..a2df7341917 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index ba9e0150338..24791379233 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index 07cdfd6e9fd..d265f146ee0 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index 2f1dd214a74..3f2d6443964 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index f40519d9018..5d8b727a1c8 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -82,6 +82,8 @@ const ( EFD_CLOEXEC = 0x400000 EFD_NONBLOCK = 0x4000 EMT_TAGOVF = 0x1 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x400000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -153,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2ab3d9..1ec2b1407b1 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index 07642c308d3..24b346e1a35 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index 923e08cb792..ebd213100b3 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 @@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 7d73dda6473..824b9c2d5e0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index 057700111e7..4f178a22934 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 @@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 87d8612a1dc..af30da55780 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -971,23 +971,6 @@ func Getpriority(which int, who int) (prio int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Getrandom(buf []byte, flags int) (n int, err error) { - var _p0 unsafe.Pointer - if len(buf) > 0 { - _p0 = unsafe.Pointer(&buf[0]) - } else { - _p0 = unsafe.Pointer(&_zero) - } - r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags)) - n = int(r0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Getrusage(who int, rusage *Rusage) (err error) { _, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0) if e1 != 0 { @@ -2229,3 +2212,19 @@ func Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) } return } + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func Mseal(b []byte, flags uint) (err error) { + var _p0 unsafe.Pointer + if len(b) > 0 { + _p0 = unsafe.Pointer(&b[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := Syscall(SYS_MSEAL, uintptr(_p0), uintptr(len(b)), uintptr(flags)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go index 9dc42410b78..1851df14e87 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s index 41b5617316c..0b43c693656 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go index 0d3a0751cd4..e1ec0dbe4ec 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s index 4019a656f6d..880c6d6e316 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go index c39f7776db3..7c8452a63e9 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s index ac4af24f908..b8ef95b0fa1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go index 57571d072fe..2ffdf861f75 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s index f77d532121b..2af3b5c762f 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go index e62963e67e2..1da08d52675 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s index fae140b62c9..b7a251353b0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go index 00831354c82..6e85b0aac95 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s index 9d1e0ff06d0..f15dadf0552 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s @@ -555,6 +555,12 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + CALL libc_mount(SB) + RET +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 CALL libc_nanosleep(SB) RET diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go index 79029ed5848..28b487df251 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s index da115f9a4b6..1e7f321e436 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go index 53aef5dc58d..524b0820cbc 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go @@ -457,4 +457,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go index 71d524763d3..f485dbf4565 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go @@ -341,6 +341,7 @@ const ( SYS_STATX = 332 SYS_IO_PGETEVENTS = 333 SYS_RSEQ = 334 + SYS_URETPROBE = 335 SYS_PIDFD_SEND_SIGNAL = 424 SYS_IO_URING_SETUP = 425 SYS_IO_URING_ENTER = 426 @@ -379,4 +380,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go index c747706131c..70b35bf3b09 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go @@ -421,4 +421,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go index f96e214f6d4..1893e2fe884 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go @@ -85,7 +85,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -324,4 +324,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go index 28425346cf1..16a4017da0a 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go @@ -84,6 +84,8 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 + SYS_NEWFSTATAT = 79 + SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 SYS_FDATASYNC = 83 @@ -318,4 +320,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go index d0953018dae..7e567f1efff 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go index 295c7f4b818..38ae55e5ef8 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go index d1a9eaca7a4..55e92e60a82 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go index bec157c39fd..60658d6a021 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go index 7ee7bdc435c..e203e8a7ed4 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go @@ -448,4 +448,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go index fad1f25b449..5944b97d546 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go index 7d3e16357d6..c66d416dad1 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go index 0ed53ad9f7e..a5459e766f5 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go @@ -84,7 +84,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -325,4 +325,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go index 2fba04ad500..01d86825bb9 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go @@ -386,4 +386,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go index 621d00d741b..7b703e77cda 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go @@ -399,4 +399,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107f3a5..d003c3d4378 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef74d0..0d45a941aae 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094a3aa..51e13eb055f 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6ee77f..d002d8ef3cc 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107ee8b1..3f863d898dd 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12acfd9..61c72931066 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c79006..b5d17414f03 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 4740b834854..3a69e454962 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -87,30 +87,35 @@ type StatxTimestamp struct { } type Statx_t struct { - Mask uint32 - Blksize uint32 - Attributes uint64 - Nlink uint32 - Uid uint32 - Gid uint32 - Mode uint16 - _ [1]uint16 - Ino uint64 - Size uint64 - Blocks uint64 - Attributes_mask uint64 - Atime StatxTimestamp - Btime StatxTimestamp - Ctime StatxTimestamp - Mtime StatxTimestamp - Rdev_major uint32 - Rdev_minor uint32 - Dev_major uint32 - Dev_minor uint32 - Mnt_id uint64 - Dio_mem_align uint32 - Dio_offset_align uint32 - _ [12]uint64 + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + Uid uint32 + Gid uint32 + Mode uint16 + _ [1]uint16 + Ino uint64 + Size uint64 + Blocks uint64 + Attributes_mask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + Rdev_major uint32 + Rdev_minor uint32 + Dev_major uint32 + Dev_minor uint32 + Mnt_id uint64 + Dio_mem_align uint32 + Dio_offset_align uint32 + Subvol uint64 + Atomic_write_unit_min uint32 + Atomic_write_unit_max uint32 + Atomic_write_segments_max uint32 + _ [1]uint32 + _ [9]uint64 } type Fsid struct { @@ -515,6 +520,29 @@ type TCPInfo struct { Total_rto_time uint32 } +type TCPVegasInfo struct { + Enabled uint32 + Rttcnt uint32 + Rtt uint32 + Minrtt uint32 +} + +type TCPDCTCPInfo struct { + Enabled uint16 + Ce_state uint16 + Alpha uint32 + Ab_ecn uint32 + Ab_tot uint32 +} + +type TCPBBRInfo struct { + Bw_lo uint32 + Bw_hi uint32 + Min_rtt uint32 + Pacing_gain uint32 + Cwnd_gain uint32 +} + type CanFilter struct { Id uint32 Mask uint32 @@ -556,6 +584,7 @@ const ( SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc SizeofTCPInfo = 0xf8 + SizeofTCPCCInfo = 0x14 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -2485,7 +2514,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -3473,7 +3502,7 @@ const ( DEVLINK_PORT_FN_ATTR_STATE = 0x2 DEVLINK_PORT_FN_ATTR_OPSTATE = 0x3 DEVLINK_PORT_FN_ATTR_CAPS = 0x4 - DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x5 + DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x6 ) type FsverityDigest struct { @@ -3765,7 +3794,7 @@ const ( ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 ETHTOOL_MSG_RSS_GET = 0x26 - ETHTOOL_MSG_USER_MAX = 0x2b + ETHTOOL_MSG_USER_MAX = 0x2c ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3805,7 +3834,10 @@ const ( ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 - ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_MSG_KERNEL_MAX = 0x2c + ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 + ETHTOOL_FLAG_OMIT_REPLY = 0x2 + ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_A_HEADER_UNSPEC = 0x0 ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 @@ -3947,7 +3979,7 @@ const ( ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17 ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18 ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19 - ETHTOOL_A_COALESCE_MAX = 0x1c + ETHTOOL_A_COALESCE_MAX = 0x1e ETHTOOL_A_PAUSE_UNSPEC = 0x0 ETHTOOL_A_PAUSE_HEADER = 0x1 ETHTOOL_A_PAUSE_AUTONEG = 0x2 @@ -3975,7 +4007,7 @@ const ( ETHTOOL_A_TSINFO_TX_TYPES = 0x3 ETHTOOL_A_TSINFO_RX_FILTERS = 0x4 ETHTOOL_A_TSINFO_PHC_INDEX = 0x5 - ETHTOOL_A_TSINFO_MAX = 0x5 + ETHTOOL_A_TSINFO_MAX = 0x6 ETHTOOL_A_CABLE_TEST_UNSPEC = 0x0 ETHTOOL_A_CABLE_TEST_HEADER = 0x1 ETHTOOL_A_CABLE_TEST_MAX = 0x1 @@ -4605,7 +4637,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x14a + NL80211_ATTR_MAX = 0x14c NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -5209,7 +5241,7 @@ const ( NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf - NL80211_FREQUENCY_ATTR_MAX = 0x20 + NL80211_FREQUENCY_ATTR_MAX = 0x21 NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6 NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11 NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc04142f..ad05b51a603 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/windows/dll_windows.go b/vendor/golang.org/x/sys/windows/dll_windows.go index 115341fba66..4e613cf6335 100644 --- a/vendor/golang.org/x/sys/windows/dll_windows.go +++ b/vendor/golang.org/x/sys/windows/dll_windows.go @@ -65,7 +65,7 @@ func LoadDLL(name string) (dll *DLL, err error) { return d, nil } -// MustLoadDLL is like LoadDLL but panics if load operation failes. +// MustLoadDLL is like LoadDLL but panics if load operation fails. func MustLoadDLL(name string) *DLL { d, e := LoadDLL(name) if e != nil { diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go index 97651b5bd04..b6e1ab76f82 100644 --- a/vendor/golang.org/x/sys/windows/security_windows.go +++ b/vendor/golang.org/x/sys/windows/security_windows.go @@ -1179,7 +1179,7 @@ type OBJECTS_AND_NAME struct { //sys makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) = advapi32.MakeSelfRelativeSD //sys setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) = advapi32.SetEntriesInAclW -//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) = advapi32.GetAce +//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) = advapi32.GetAce // Control returns the security descriptor control bits. func (sd *SECURITY_DESCRIPTOR) Control() (control SECURITY_DESCRIPTOR_CONTROL, revision uint32, err error) { diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 6525c62f3c2..5cee9a3143f 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -17,8 +17,10 @@ import ( "unsafe" ) -type Handle uintptr -type HWND uintptr +type ( + Handle uintptr + HWND uintptr +) const ( InvalidHandle = ^Handle(0) @@ -211,6 +213,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys OpenProcess(desiredAccess uint32, inheritHandle bool, processId uint32) (handle Handle, err error) //sys ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *uint16, showCmd int32) (err error) [failretval<=32] = shell32.ShellExecuteW //sys GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) = user32.GetWindowThreadProcessId +//sys LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) [failretval==0] = user32.LoadKeyboardLayoutW +//sys UnloadKeyboardLayout(hkl Handle) (err error) = user32.UnloadKeyboardLayout +//sys GetKeyboardLayout(tid uint32) (hkl Handle) = user32.GetKeyboardLayout +//sys ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) = user32.ToUnicodeEx //sys GetShellWindow() (shellWindow HWND) = user32.GetShellWindow //sys MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) [failretval==0] = user32.MessageBoxW //sys ExitWindowsEx(flags uint32, reason uint32) (err error) = user32.ExitWindowsEx @@ -307,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode //sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo //sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition +//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP +//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP +//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP +//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole @@ -1368,9 +1378,11 @@ func SetsockoptLinger(fd Handle, level, opt int, l *Linger) (err error) { func SetsockoptInet4Addr(fd Handle, level, opt int, value [4]byte) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(&value[0])), 4) } + func SetsockoptIPMreq(fd Handle, level, opt int, mreq *IPMreq) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(mreq)), int32(unsafe.Sizeof(*mreq))) } + func SetsockoptIPv6Mreq(fd Handle, level, opt int, mreq *IPv6Mreq) (err error) { return syscall.EWINDOWS } diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index d8cb71db0a6..7b97a154c95 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1060,6 +1060,7 @@ const ( SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6 SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4 SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12 + SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15 // cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460 @@ -2003,7 +2004,21 @@ const ( MOVEFILE_FAIL_IF_NOT_TRACKABLE = 0x20 ) -const GAA_FLAG_INCLUDE_PREFIX = 0x00000010 +// Flags for GetAdaptersAddresses, see +// https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-getadaptersaddresses. +const ( + GAA_FLAG_SKIP_UNICAST = 0x1 + GAA_FLAG_SKIP_ANYCAST = 0x2 + GAA_FLAG_SKIP_MULTICAST = 0x4 + GAA_FLAG_SKIP_DNS_SERVER = 0x8 + GAA_FLAG_INCLUDE_PREFIX = 0x10 + GAA_FLAG_SKIP_FRIENDLY_NAME = 0x20 + GAA_FLAG_INCLUDE_WINS_INFO = 0x40 + GAA_FLAG_INCLUDE_GATEWAYS = 0x80 + GAA_FLAG_INCLUDE_ALL_INTERFACES = 0x100 + GAA_FLAG_INCLUDE_ALL_COMPARTMENTS = 0x200 + GAA_FLAG_INCLUDE_TUNNEL_BINDINGORDER = 0x400 +) const ( IF_TYPE_OTHER = 1 @@ -2017,6 +2032,50 @@ const ( IF_TYPE_IEEE1394 = 144 ) +// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin +const ( + IpPrefixOriginOther = 0 + IpPrefixOriginManual = 1 + IpPrefixOriginWellKnown = 2 + IpPrefixOriginDhcp = 3 + IpPrefixOriginRouterAdvertisement = 4 + IpPrefixOriginUnchanged = 1 << 4 +) + +// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin +const ( + NlsoOther = 0 + NlsoManual = 1 + NlsoWellKnown = 2 + NlsoDhcp = 3 + NlsoLinkLayerAddress = 4 + NlsoRandom = 5 + IpSuffixOriginOther = 0 + IpSuffixOriginManual = 1 + IpSuffixOriginWellKnown = 2 + IpSuffixOriginDhcp = 3 + IpSuffixOriginLinkLayerAddress = 4 + IpSuffixOriginRandom = 5 + IpSuffixOriginUnchanged = 1 << 4 +) + +// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state +const ( + NldsInvalid = 0 + NldsTentative = 1 + NldsDuplicate = 2 + NldsDeprecated = 3 + NldsPreferred = 4 + IpDadStateInvalid = 0 + IpDadStateTentative = 1 + IpDadStateDuplicate = 2 + IpDadStateDeprecated = 3 + IpDadStatePreferred = 4 +) + type SocketAddress struct { Sockaddr *syscall.RawSockaddrAny SockaddrLength int32 @@ -3404,3 +3463,14 @@ type DCB struct { EvtChar byte wReserved1 uint16 } + +// Keyboard Layout Flags. +// See https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-loadkeyboardlayoutw +const ( + KLF_ACTIVATE = 0x00000001 + KLF_SUBSTITUTE_OK = 0x00000002 + KLF_REORDER = 0x00000008 + KLF_REPLACELANG = 0x00000010 + KLF_NOTELLSHELL = 0x00000080 + KLF_SETFORPROCESS = 0x00000100 +) diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index eba761018aa..4c2e1bdc01e 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -247,7 +247,9 @@ var ( procGetCommandLineW = modkernel32.NewProc("GetCommandLineW") procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") procGetComputerNameW = modkernel32.NewProc("GetComputerNameW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") procGetConsoleMode = modkernel32.NewProc("GetConsoleMode") + procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP") procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo") procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW") procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId") @@ -347,8 +349,10 @@ var ( procSetCommMask = modkernel32.NewProc("SetCommMask") procSetCommState = modkernel32.NewProc("SetCommState") procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts") + procSetConsoleCP = modkernel32.NewProc("SetConsoleCP") procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition") procSetConsoleMode = modkernel32.NewProc("SetConsoleMode") + procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP") procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW") procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories") procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW") @@ -478,12 +482,16 @@ var ( procGetDesktopWindow = moduser32.NewProc("GetDesktopWindow") procGetForegroundWindow = moduser32.NewProc("GetForegroundWindow") procGetGUIThreadInfo = moduser32.NewProc("GetGUIThreadInfo") + procGetKeyboardLayout = moduser32.NewProc("GetKeyboardLayout") procGetShellWindow = moduser32.NewProc("GetShellWindow") procGetWindowThreadProcessId = moduser32.NewProc("GetWindowThreadProcessId") procIsWindow = moduser32.NewProc("IsWindow") procIsWindowUnicode = moduser32.NewProc("IsWindowUnicode") procIsWindowVisible = moduser32.NewProc("IsWindowVisible") + procLoadKeyboardLayoutW = moduser32.NewProc("LoadKeyboardLayoutW") procMessageBoxW = moduser32.NewProc("MessageBoxW") + procToUnicodeEx = moduser32.NewProc("ToUnicodeEx") + procUnloadKeyboardLayout = moduser32.NewProc("UnloadKeyboardLayout") procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock") procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock") procGetUserProfileDirectoryW = moduserenv.NewProc("GetUserProfileDirectoryW") @@ -789,6 +797,14 @@ func FreeSid(sid *SID) (err error) { return } +func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) { + r1, _, e1 := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func GetLengthSid(sid *SID) (len uint32) { r0, _, _ := syscall.Syscall(procGetLengthSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) len = uint32(r0) @@ -1225,14 +1241,6 @@ func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCE return } -func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) { - r0, _, _ := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) - if r0 == 0 { - ret = GetLastError() - } - return -} - func SetKernelObjectSecurity(handle Handle, securityInformation SECURITY_INFORMATION, securityDescriptor *SECURITY_DESCRIPTOR) (err error) { r1, _, e1 := syscall.Syscall(procSetKernelObjectSecurity.Addr(), 3, uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor))) if r1 == 0 { @@ -2158,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { return } +func GetConsoleCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleMode(console Handle, mode *uint32) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) if r1 == 0 { @@ -2166,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { return } +func GetConsoleOutputCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) if r1 == 0 { @@ -3034,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { return } +func SetConsoleCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func setConsoleCursorPosition(console Handle, position uint32) (err error) { r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) if r1 == 0 { @@ -3050,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { return } +func SetConsoleOutputCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func SetCurrentDirectory(path *uint16) (err error) { r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) if r1 == 0 { @@ -4082,6 +4124,12 @@ func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) { return } +func GetKeyboardLayout(tid uint32) (hkl Handle) { + r0, _, _ := syscall.Syscall(procGetKeyboardLayout.Addr(), 1, uintptr(tid), 0, 0) + hkl = Handle(r0) + return +} + func GetShellWindow() (shellWindow HWND) { r0, _, _ := syscall.Syscall(procGetShellWindow.Addr(), 0, 0, 0, 0) shellWindow = HWND(r0) @@ -4115,6 +4163,15 @@ func IsWindowVisible(hwnd HWND) (isVisible bool) { return } +func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) { + r0, _, e1 := syscall.Syscall(procLoadKeyboardLayoutW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(flags), 0) + hkl = Handle(r0) + if hkl == 0 { + err = errnoErr(e1) + } + return +} + func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) { r0, _, e1 := syscall.Syscall6(procMessageBoxW.Addr(), 4, uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype), 0, 0) ret = int32(r0) @@ -4124,6 +4181,20 @@ func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret i return } +func ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) { + r0, _, _ := syscall.Syscall9(procToUnicodeEx.Addr(), 7, uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl), 0, 0) + ret = int32(r0) + return +} + +func UnloadKeyboardLayout(hkl Handle) (err error) { + r1, _, e1 := syscall.Syscall(procUnloadKeyboardLayout.Addr(), 1, uintptr(hkl), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) (err error) { var _p0 uint32 if inheritExisting { diff --git a/vendor/modules.txt b/vendor/modules.txt index 1fa09e2acba..7db16486fe7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -137,7 +137,7 @@ github.com/alicebob/miniredis/v2/geohash github.com/alicebob/miniredis/v2/hyperloglog github.com/alicebob/miniredis/v2/metro github.com/alicebob/miniredis/v2/server -# github.com/andybalholm/brotli v1.1.0 +# github.com/andybalholm/brotli v1.1.1 ## explicit; go 1.13 github.com/andybalholm/brotli github.com/andybalholm/brotli/matchfinder @@ -758,8 +758,8 @@ github.com/jsternberg/zap-logfmt # github.com/julienschmidt/httprouter v1.3.0 ## explicit; go 1.7 github.com/julienschmidt/httprouter -# github.com/klauspost/compress v1.17.9 -## explicit; go 1.20 +# github.com/klauspost/compress v1.17.11 +## explicit; go 1.21 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse @@ -807,7 +807,7 @@ github.com/mattn/go-colorable # github.com/mattn/go-isatty v0.0.20 ## explicit; go 1.15 github.com/mattn/go-isatty -# github.com/mattn/go-runewidth v0.0.15 +# github.com/mattn/go-runewidth v0.0.16 ## explicit; go 1.9 github.com/mattn/go-runewidth # github.com/miekg/dns v1.1.61 @@ -980,8 +980,8 @@ github.com/opentracing/opentracing-go/log github.com/openzipkin/zipkin-go/model github.com/openzipkin/zipkin-go/proto/zipkin_proto3 github.com/openzipkin/zipkin-go/reporter -# github.com/parquet-go/parquet-go v0.23.0 -## explicit; go 1.21 +# github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe +## explicit; go 1.22 github.com/parquet-go/parquet-go github.com/parquet-go/parquet-go/bloom github.com/parquet-go/parquet-go/bloom/xxhash @@ -999,6 +999,7 @@ github.com/parquet-go/parquet-go/encoding/bytestreamsplit github.com/parquet-go/parquet-go/encoding/delta github.com/parquet-go/parquet-go/encoding/plain github.com/parquet-go/parquet-go/encoding/rle +github.com/parquet-go/parquet-go/encoding/thrift github.com/parquet-go/parquet-go/format github.com/parquet-go/parquet-go/hashprobe github.com/parquet-go/parquet-go/hashprobe/aeshash @@ -1157,9 +1158,6 @@ github.com/sagikazarmark/slog-shim # github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 ## explicit github.com/sean-/seed -# github.com/segmentio/encoding v0.4.0 -## explicit; go 1.18 -github.com/segmentio/encoding/thrift # github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e ## explicit github.com/segmentio/fasthash/fnv1a @@ -1532,7 +1530,7 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/semconvut # go.opentelemetry.io/contrib/propagators/b3 v1.27.0 ## explicit; go 1.21 go.opentelemetry.io/contrib/propagators/b3 -# go.opentelemetry.io/otel v1.30.0 +# go.opentelemetry.io/otel v1.31.0 ## explicit; go 1.22 go.opentelemetry.io/otel go.opentelemetry.io/otel/attribute @@ -1622,13 +1620,13 @@ go.opentelemetry.io/otel/exporters/stdout/stdouttrace go.opentelemetry.io/otel/log go.opentelemetry.io/otel/log/embedded go.opentelemetry.io/otel/log/noop -# go.opentelemetry.io/otel/metric v1.30.0 +# go.opentelemetry.io/otel/metric v1.31.0 ## explicit; go 1.22 go.opentelemetry.io/otel/metric go.opentelemetry.io/otel/metric/embedded go.opentelemetry.io/otel/metric/noop -# go.opentelemetry.io/otel/sdk v1.28.0 -## explicit; go 1.21 +# go.opentelemetry.io/otel/sdk v1.31.0 +## explicit; go 1.22 go.opentelemetry.io/otel/sdk go.opentelemetry.io/otel/sdk/instrumentation go.opentelemetry.io/otel/sdk/internal/env @@ -1647,7 +1645,7 @@ go.opentelemetry.io/otel/sdk/metric/internal/aggregate go.opentelemetry.io/otel/sdk/metric/internal/exemplar go.opentelemetry.io/otel/sdk/metric/internal/x go.opentelemetry.io/otel/sdk/metric/metricdata -# go.opentelemetry.io/otel/trace v1.30.0 +# go.opentelemetry.io/otel/trace v1.31.0 ## explicit; go 1.22 go.opentelemetry.io/otel/trace go.opentelemetry.io/otel/trace/embedded @@ -1748,7 +1746,7 @@ golang.org/x/oauth2/jwt ## explicit; go 1.18 golang.org/x/sync/errgroup golang.org/x/sync/semaphore -# golang.org/x/sys v0.22.0 +# golang.org/x/sys v0.26.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/unix