Skip to content

Commit

Permalink
grpc: FileMatch: tweak file_name to be bytes instead of string
Browse files Browse the repository at this point in the history
In gRPC, all strings must be valid utf-8 sequences. However, there is no guarantee that the file path we receive is valid-utf8 (from Sourcegraph or another source). See https://cloudlogging.app.goo.gl/aWr5in5axbVWGntd6 for an example of this error on sourcegraph.com:

```
{
  "jsonPayload": {
    "Caller": "internalerrs/logging.go:236",
    "Body": "grpc: error while marshaling: string field contains invalid UTF-8",
    "Resource": {
      "service.name": "zoekt-webserver",
      "service.instance.id": "indexed-search-15"
    },
    "Attributes": {
      "initialRequestJSON": "{\"query\":{\"Query\":{\"And\":{\"children\":[{\"Query\":{\"Substring\":{\"pattern\":\"typeof window\"}}},{\"Query\":{\"Branch\":{\"pattern\":\"HEAD\",\"exact\":true}}},{\"Query\":{\"RawConfig\":{\"flags\":[1,8,32]}}}]}}},\"opts\":{\"shard_max_match_count\":99999999,\"total_max_match_count\":99999999,\"max_wall_time\":{\"seconds\":54,\"nanos\":901824329},\"flush_wall_time\":{\"seconds\":1},\"max_doc_display_count\":99999999,\"chunk_matches\":true,\"use_document_ranks\":true,\"document_ranks_weight\":4500}}",
      "messageJSON": "{\"stats\":{\"duration\":{},\"wait\":{},\"match_tree_construction\":{},\"match_tree_search\":{}},\"progress\":{\"priority\":27,\"max_pending_priority\":101},\"files\":[{\"score\":774.0376017917057,\"file_name\":\"Ch1/1-8\\ufffd\\ufffd\\ufffd\\ufffd\\ufffd\\ufffdͻ/lib/prototype.js\",\"repository\":\"github.com/cssrain/Sharp-jQuery-Source\",\"branches\":[\"HEAD\"],\"chunk_matches\":[{\"content\":\"ICAgICAgaWYgKHR5cGVvZiB3aW5kb3cuSFRNTERpdkVsZW1lbnQgIT09ICd1bmRlZmluZWQnKQ==\",\"content_start\":{\"byte_offset\":1102,\"line_number\":35,\"column\":1},\"ranges\":[{\"start\":{\"byte_offset\":1112,\"line_number\":35,\"column\":11},\"end\":{\"byte_offset\":1125,\"line_number\":35,\"column\":24}}],\"score\":501},{\"content\":\"ICAgIGlmICh0eXBlb2Ygd2luZG93LkVsZW1lbnQgIT0gJ3VuZGVmaW5lZCcpIHs=\",\"content_start\":{\"byte_offset\":78805,\"line_number\":2887,\"column\":1},\"ranges\":[{\"start\":{\"byte_offset\":78813,\"line_number\":2887,\"column\":9},\"end\":{\"byte_offset\":78826,\"line_number\":2887,\"column\":22}}],\"score\":500.5}],\"repository_id\":35867662,\"repository_priority\":27,\"checksum\":\"N50W...(truncated 1324 bytes)",
      "grpcCode": "Internal",
      "nonUTF8StringFields": [
        "files[0].file_name"
      ],
      "grpcService": "grpc.v1.WebserverService",
      "grpcMethod": "StreamSearch"
    },
}
```
  • Loading branch information
ggilmore committed Aug 24, 2023
1 parent f75df3d commit 749ad46
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 8 deletions.
4 changes: 2 additions & 2 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func FileMatchFromProto(p *proto.FileMatch) FileMatch {
return FileMatch{
Score: p.GetScore(),
Debug: p.GetDebug(),
FileName: p.GetFileName(),
FileName: string(p.GetFileName()), // Note: 🚨Warning, this filename may be a non-UTF8 string.
Repository: p.GetRepository(),
Branches: p.GetBranches(),
LineMatches: lineMatches,
Expand Down Expand Up @@ -67,7 +67,7 @@ func (m *FileMatch) ToProto() *proto.FileMatch {
return &proto.FileMatch{
Score: m.Score,
Debug: m.Debug,
FileName: m.FileName,
FileName: []byte(m.FileName),
Repository: m.Repository,
Branches: m.Branches,
LineMatches: lineMatches,
Expand Down
10 changes: 10 additions & 0 deletions gen-proto.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

cd "$(dirname "${BASH_SOURCE[0]}")"
set -euo pipefail

find . -name "buf.gen.yaml" -not -path ".git" | while read -r buf_yaml; do
pushd "$(dirname "${buf_yaml}")" >/dev/null
buf generate
popd >/dev/null
done
12 changes: 7 additions & 5 deletions grpc/v1/webserver.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion grpc/v1/webserver.proto
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,9 @@ message FileMatch {
// other packages can print some diagnostics.
string debug = 2;

string file_name = 3;
// The repository-relative path to the file.
// 🚨Warning: file_name might not be a valid UTF-8 string.
bytes file_name = 3;

// Repository is the globally unique name of the repo of the
// match
Expand Down

0 comments on commit 749ad46

Please sign in to comment.