Skip to content

Commit

Permalink
testscript: suggest misspelled commands
Browse files Browse the repository at this point in the history
If a command is not found, we go through the list of defined commands
and check if any of them are sufficiently close to the one used.
"Sufficiently close" is defined by having a Damerau-Levenshtein distance
of 1, which feels like it hits the sweet spot between usefulness and
ease of implementation.

The negation case is still special-cased, as negation is not in the set
of defined commands.

Fixes rogpeppe#190
  • Loading branch information
Merovius authored and mvdan committed May 5, 2023
1 parent 44c3b86 commit 5150104
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 1 deletion.
68 changes: 68 additions & 0 deletions internal/misspell/misspell.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Package misspell impements utilities for basic spelling correction.
package misspell

import (
"unicode/utf8"
)

// AlmostEqual reports whether a and b have Damerau-Levenshtein distance of at
// most 1. That is, it reports whether a can be transformed into b by adding,
// removing or substituting a single rune, or by swapping two adjacent runes.
// Invalid runes are considered equal.
//
// It runs in O(len(a)+len(b)) time.
func AlmostEqual(a, b string) bool {
for len(a) > 0 && len(b) > 0 {
ra, tailA := shiftRune(a)
rb, tailB := shiftRune(b)
if ra == rb {
a, b = tailA, tailB
continue
}
// check for addition/deletion/substitution
if equalValid(a, tailB) || equalValid(tailA, b) || equalValid(tailA, tailB) {
return true
}
if len(tailA) == 0 || len(tailB) == 0 {
return false
}
// check for swap
a, b = tailA, tailB
Ra, tailA := shiftRune(tailA)
Rb, tailB := shiftRune(tailB)
return ra == Rb && Ra == rb && equalValid(tailA, tailB)
}
if len(a) == 0 {
return len(b) == 0 || singleRune(b)
}
return singleRune(a)
}

// singleRune reports whether s consists of a single UTF-8 codepoint.
func singleRune(s string) bool {
_, n := utf8.DecodeRuneInString(s)
return n == len(s)
}

// shiftRune splits off the first UTF-8 codepoint from s and returns it and the
// rest of the string. It panics if s is empty.
func shiftRune(s string) (rune, string) {
if len(s) == 0 {
panic(s)
}
r, n := utf8.DecodeRuneInString(s)
return r, s[n:]
}

// equalValid reports whether a and b are equal, if invalid code points are considered equal.
func equalValid(a, b string) bool {
var ra, rb rune
for len(a) > 0 && len(b) > 0 {
ra, a = shiftRune(a)
rb, b = shiftRune(b)
if ra != rb {
return false
}
}
return len(a) == 0 && len(b) == 0
}
100 changes: 100 additions & 0 deletions internal/misspell/misspell_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package misspell

import (
"math"
"testing"
)

func TestAlmostEqual(t *testing.T) {
t.Parallel()

tcs := []struct {
inA string
inB string
want bool
}{
{"", "", true},
{"", "a", true},
{"a", "a", true},
{"a", "b", true},
{"hello", "hell", true},
{"hello", "jello", true},
{"hello", "helol", true},
{"hello", "jelol", false},
}
for _, tc := range tcs {
got := AlmostEqual(tc.inA, tc.inB)
if got != tc.want {
t.Errorf("AlmostEqual(%q, %q) = %v, want %v", tc.inA, tc.inB, got, tc.want)
}
// two tests for the price of one \o/
if got != AlmostEqual(tc.inB, tc.inA) {
t.Errorf("AlmostEqual(%q, %q) == %v != AlmostEqual(%q, %q)", tc.inA, tc.inB, got, tc.inB, tc.inA)
}
}
}

func FuzzAlmostEqual(f *testing.F) {
f.Add("", "")
f.Add("", "a")
f.Add("a", "a")
f.Add("a", "b")
f.Add("hello", "hell")
f.Add("hello", "jello")
f.Add("hello", "helol")
f.Add("hello", "jelol")
f.Fuzz(func(t *testing.T, a, b string) {
if len(a) > 10 || len(b) > 10 {
// longer strings won't add coverage, but take longer to check
return
}
d := editDistance([]rune(a), []rune(b))
got := AlmostEqual(a, b)
if want := d <= 1; got != want {
t.Errorf("AlmostEqual(%q, %q) = %v, editDistance(%q, %q) = %d", a, b, got, a, b, d)
}
if got != AlmostEqual(b, a) {
t.Errorf("AlmostEqual(%q, %q) == %v != AlmostEqual(%q, %q)", a, b, got, b, a)
}
})
}

// editDistance returns the Damerau-Levenshtein distance between a and b. It is
// inefficient, but by keeping almost verbatim to the recursive definition from
// Wikipedia, hopefully "obviously correct" and thus suitable for the fuzzing
// test of AlmostEqual.
func editDistance(a, b []rune) int {
i, j := len(a), len(b)
m := math.MaxInt
if i == 0 && j == 0 {
return 0
}
if i > 0 {
m = min(m, editDistance(a[1:], b)+1)
}
if j > 0 {
m = min(m, editDistance(a, b[1:])+1)
}
if i > 0 && j > 0 {
d := editDistance(a[1:], b[1:])
if a[0] != b[0] {
d += 1
}
m = min(m, d)
}
if i > 1 && j > 1 && a[0] == b[1] && a[1] == b[0] {
d := editDistance(a[2:], b[2:])
if a[0] != b[0] {
d += 1
}
m = min(m, d)
}
return m
}

func min(a, b int) int {
if a < b {
return a
}
return b
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("")
string("00")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\x980")
string("0\xb70")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("OOOOOOOO000")
string("0000000000000")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("OOOOOOOO000")
string("0000000000\x1000")
17 changes: 17 additions & 0 deletions testscript/testdata/testscript_notfound.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Check that unknown commands output a useful error message

! testscript notfound
stdout 'unknown command "notexist"'

! testscript negation
stdout 'unknown command "!exists" \(did you mean "! exists"\?\)'

! testscript misspelled
stdout 'unknown command "exits" \(did you mean "exists"\?\)'

-- notfound/script.txt --
notexist
-- negation/script.txt --
!exists file
-- misspelled/script.txt --
exits file
49 changes: 48 additions & 1 deletion testscript/testscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ import (
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"sync/atomic"
"syscall"
"testing"
"time"

"github.com/rogpeppe/go-internal/imports"
"github.com/rogpeppe/go-internal/internal/misspell"
"github.com/rogpeppe/go-internal/internal/os/execpath"
"github.com/rogpeppe/go-internal/par"
"github.com/rogpeppe/go-internal/testenv"
Expand Down Expand Up @@ -669,7 +671,16 @@ func (ts *TestScript) runLine(line string) (runOK bool) {
cmd = ts.params.Cmds[args[0]]
}
if cmd == nil {
ts.Fatalf("unknown command %q", args[0])
// try to find spelling corrections. We arbitrarily limit the number of
// corrections, to not be too noisy.
switch c := ts.cmdSuggestions(args[0]); len(c) {
case 1:
ts.Fatalf("unknown command %q (did you mean %q?)", args[0], c[0])
case 2, 3, 4:
ts.Fatalf("unknown command %q (did you mean one of %q?)", args[0], c)
default:
ts.Fatalf("unknown command %q", args[0])
}
}
ts.callBuiltinCmd(args[0], func() {
cmd(ts, neg, args[1:])
Expand All @@ -694,6 +705,42 @@ func (ts *TestScript) callBuiltinCmd(cmd string, runCmd func()) {
runCmd()
}

func (ts *TestScript) cmdSuggestions(name string) []string {
// special case: spell-correct `!cmd` to `! cmd`
if strings.HasPrefix(name, "!") {
if _, ok := scriptCmds[name[1:]]; ok {
return []string{"! " + name[1:]}
}
if _, ok := ts.params.Cmds[name[1:]]; ok {
return []string{"! " + name[1:]}
}
}
var candidates []string
for c := range scriptCmds {
if misspell.AlmostEqual(name, c) {
candidates = append(candidates, c)
}
}
for c := range ts.params.Cmds {
if misspell.AlmostEqual(name, c) {
candidates = append(candidates, c)
}
}
if len(candidates) == 0 {
return nil
}
// deduplicate candidates
// TODO: Use slices.Compact (and maybe slices.Sort) once we can use Go 1.21
sort.Strings(candidates)
out := candidates[:1]
for _, c := range candidates[1:] {
if out[len(out)-1] == c {
out = append(out, c)
}
}
return out
}

func (ts *TestScript) applyScriptUpdates() {
if len(ts.scriptUpdates) == 0 {
return
Expand Down

0 comments on commit 5150104

Please sign in to comment.