Skip to content

Commit

Permalink
Add ChunkMatch.BestLineMatch to return the best-scoring line
Browse files Browse the repository at this point in the history
  • Loading branch information
jtibshirani committed Jan 7, 2025
1 parent 37c4df8 commit ce59d31
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 104 deletions.
6 changes: 6 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,13 @@ type ChunkMatch struct {
// beginning of a line (Column will always be 1).
ContentStart Location

// Score is the overall relevance score of this chunk.
Score float64

// BestLineMatch is the line number of the highest-scoring line match in this chunk.
// The line number represents the index in the full file, and is 1-based. If FileName: true,
// this number will be 0.
BestLineMatch uint32
}

func (cm *ChunkMatch) sizeBytes() (sz uint64) {
Expand Down
30 changes: 16 additions & 14 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch {
}

return ChunkMatch{
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
DebugScore: p.GetDebugScore(),
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
BestLineMatch: p.GetBestLineMatch(),
DebugScore: p.GetDebugScore(),
}
}

Expand All @@ -118,13 +119,14 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch {
}

return &proto.ChunkMatch{
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
DebugScore: cm.DebugScore,
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
BestLineMatch: cm.BestLineMatch,
DebugScore: cm.DebugScore,
}
}

Expand Down
2 changes: 1 addition & 1 deletion api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func TestMatchSize(t *testing.T) {
size: 256,
}, {
v: ChunkMatch{},
size: 112,
size: 120,
}, {
v: candidateMatch{},
size: 80,
Expand Down
53 changes: 48 additions & 5 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ import (
)

type scoreCase struct {
fileName string
content []byte
query query.Q
language string
wantScore float64
fileName string
content []byte
query query.Q
language string
wantScore float64
wantBestLineMatch uint32
}

func TestFileNameMatch(t *testing.T) {
Expand Down Expand Up @@ -79,6 +80,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 0.57 <- sum-termFrequencyScore: 10.00, length-ratio: 1.00
wantScore: 0.57,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
}, {
// Matches only on content
fileName: "example.java",
Expand All @@ -91,6 +94,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 1.75 <- sum-termFrequencyScore: 56.00, length-ratio: 1.00
wantScore: 1.75,
// line 3: public class InnerClasses {
wantBestLineMatch: 3,
},
{
// Matches only on filename
Expand Down Expand Up @@ -130,6 +135,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
wantScore: 6550,
// line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
wantBestLineMatch: 37,
},
{
fileName: "example.java",
Expand All @@ -138,6 +145,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
wantScore: 7000,
// line 32: public static class InnerStaticClass {
wantBestLineMatch: 32,
},
{
fileName: "example.java",
Expand All @@ -146,6 +155,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -154,6 +165,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 800 (Java interface) + 500 (word)
wantScore: 8300,
// line 22: public interface InnerInterface<A, B> {
wantBestLineMatch: 22,
},
{
fileName: "example.java",
Expand All @@ -162,6 +175,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word)
wantScore: 8200,
// line 44: public void innerMethod() {
wantBestLineMatch: 44,
},
{
fileName: "example.java",
Expand All @@ -170,6 +185,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 600 (Java field) + 500 (word)
wantScore: 8100,
// line 38: private final int field;
wantBestLineMatch: 38,
},
{
fileName: "example.java",
Expand All @@ -178,6 +195,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 500 (Java enum constant) + 500 (word)
wantScore: 8000,
// line 18: B,
wantBestLineMatch: 18,
},
// 2 Atoms (1x content and 1x filename)
{
Expand All @@ -187,6 +206,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
wantScore: 6800,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
},
// 3 Atoms (2x content, 1x filename)
{
Expand All @@ -199,6 +220,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
wantScore: 8466,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
// 4 Atoms (4x content)
{
Expand All @@ -213,6 +236,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
wantScore: 8700,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -221,6 +246,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
wantScore: 4750,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
{
fileName: "example.java",
Expand All @@ -229,6 +256,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (Symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -237,6 +266,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
wantScore: 6900,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -245,6 +276,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
wantScore: 5400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
}

Expand Down Expand Up @@ -640,6 +673,16 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag
t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
}

if c.wantBestLineMatch != 0 {
if len(srs.Files[0].ChunkMatches) == 0 {
t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
}
chunkMatch := srs.Files[0].ChunkMatches[0]
if chunkMatch.BestLineMatch != c.wantBestLineMatch {
t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
}
}

if got := srs.Files[0].Language; got != c.language {
t.Fatalf("want %s, got %s", c.language, got)
}
Expand Down
Loading

0 comments on commit ce59d31

Please sign in to comment.