Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scoring: use repo freshness as tiebreaker #832

Merged
merged 5 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,16 @@ func (r *Repository) UnmarshalJSON(data []byte) error {
r.ID = uint32(id)
}

if v, ok := repo.RawConfig["priority"]; ok {
// Sourcegraph indexserver doesn't set repo.Rank, so we set it here. Setting it
// on read instead of during indexing allows us to avoid a complete reindex.
//
// Prefer "latest_commit_date" over "priority" for ranking. We keep priority for
// backwards compatibility.
if _, ok := repo.RawConfig["latest_commit_date"]; ok {
// We use the number of months since 1970 as a simple measure of repo freshness.
// It is monotonically increasing and stable across re-indexes and restarts.
r.Rank = monthsSince1970(repo.LatestCommitDate)
Comment on lines +644 to +646
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as an atlernative, why not use the unix timestamp which is also monotonically increasing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically the unix timestamp, but in buckets of 1 month to fit into the uint16 we have. I think monthly increments make sense and the unit16 keeps the number of digits we have to reserve in the score to 5.

} else if v, ok := repo.RawConfig["priority"]; ok {
r.priority, err = strconv.ParseFloat(v, 64)
if err != nil {
r.priority = 0
Expand All @@ -645,14 +654,28 @@ func (r *Repository) UnmarshalJSON(data []byte) error {
// based on priority. Setting it on read instead of during indexing
// allows us to avoid a complete reindex.
if r.Rank == 0 && r.priority > 0 {
// Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular
// repos (roughly ones with over 5,000 stars) see diminishing returns from more stars.
// Normalize the repo score within [0, maxUint16), with the midpoint at 5,000.
// This means popular repos (roughly ones with over 5,000 stars) see diminishing
// returns from more stars.
r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16)
}
}

return nil
}

// monthsSince1970 returns the number of months since 1970. It returns values in
// the range [0, maxUInt16]. The upper bound is reached in the year 7431, the
// lower bound for all dates before 1970.
func monthsSince1970(t time.Time) uint16 {
base := time.Unix(0, 0)
if t.Before(base) {
return 0
}
months := int(t.Year()-1970)*12 + int(t.Month()-1)
return uint16(min(months, maxUInt16))
}

// MergeMutable will merge x into r. mutated will be true if it made any
// changes. err is non-nil if we needed to mutate an immutable field.
//
Expand Down
26 changes: 26 additions & 0 deletions api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,29 @@ func TestRepositoryMergeMutable(t *testing.T) {
}
})
}

func TestMonthsSince1970(t *testing.T) {
tests := []struct {
name string
input time.Time
expected uint16
}{
{"Before 1970", time.Date(1950, 12, 31, 0, 0, 0, 0, time.UTC), 0},
{"Unix 0", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC), 0},
{"Feb 1970", time.Date(1970, 2, 1, 0, 0, 0, 0, time.UTC), 1},
{"Year 1989", time.Date(1989, 12, 13, 0, 0, 0, 0, time.UTC), 239},
{"Sep 2024", time.Date(2024, 9, 20, 0, 0, 0, 0, time.UTC), 656},
{"Oct 2024", time.Date(2024, 10, 20, 0, 0, 0, 0, time.UTC), 657},
{"Apr 7431", time.Date(7431, 4, 1, 0, 0, 0, 0, time.UTC), 65535},
{"9999", time.Date(9999, 0, 0, 0, 0, 0, 0, time.UTC), 65535},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := monthsSince1970(tt.input)
if result != tt.expected {
t.Errorf("expected %d, got %d", tt.expected, result)
}
})
}
}
Loading
Loading