diff --git a/adapters.go b/adapters.go index c7d02e1..3668cc0 100644 --- a/adapters.go +++ b/adapters.go @@ -18,18 +18,17 @@ func isDir(path string, d fs.DirEntry) bool { return false } -// IgnoreDuplicateDirs wraps fs.WalkDirFunc walkFn to make it follow symbolic +// IgnoreDuplicateDirs wraps [fs.WalkDirFunc] walkFn to make it follow symbolic // links and ignore duplicate directories (if a symlink points to a directory // that has already been traversed it is skipped). The walkFn is called for // for skipped directories, but the directory is not traversed (this is // required for error handling). // -// The Config.Follow setting has no effect on the behavior of Walk when +// The Follow [Config] setting has no effect on the behavior of Walk when // this wrapper is used. // -// In most use cases, the returned fs.WalkDirFunc should not be reused between -// in another call to Walk. If it is reused, any previously visited file will -// be skipped. +// In most use cases, the returned [fs.WalkDirFunc] should not be reused. +// If it is reused, any previously visited file will be skipped. // // NOTE: The order of traversal is undefined. Given an "example" directory // like the one below where "dir" is a directory and "smydir1" and "smydir2" @@ -68,9 +67,8 @@ func IgnoreDuplicateDirs(walkFn fs.WalkDirFunc) fs.WalkDirFunc { // files are ignored. If a symlink resolves to a file that has already been // visited it will be skipped. // -// In most use cases, the returned fs.WalkDirFunc should not be reused between -// in another call to Walk. If it is reused, any previously visited file will -// be skipped. +// In most use cases, the returned [fs.WalkDirFunc] should not be reused. +// If it is reused, any previously visited file will be skipped. // // This can significantly slow Walk as os.Stat() is called for each path // (on Windows, os.Stat() is only needed for symlinks). @@ -92,8 +90,8 @@ func IgnoreDuplicateFiles(walkFn fs.WalkDirFunc) fs.WalkDirFunc { } } -// IgnorePermissionErrors wraps walkFn so that permission errors are ignored. -// The returned fs.WalkDirFunc may be reused. +// IgnorePermissionErrors wraps walkFn so that [fs.ErrPermission] permission +// errors are ignored. The returned [fs.WalkDirFunc] may be reused. func IgnorePermissionErrors(walkFn fs.WalkDirFunc) fs.WalkDirFunc { return func(path string, d fs.DirEntry, err error) error { if err != nil && os.IsPermission(err) { diff --git a/dirent.go b/dirent.go index 92aa31e..6ae2c09 100644 --- a/dirent.go +++ b/dirent.go @@ -31,11 +31,16 @@ func loadFileInfo(pinfo **fileInfo) *fileInfo { return fi } -// StatDirEntry returns the fs.FileInfo for the file or subdirectory described -// by the entry. If the entry is a symbolic link, StatDirEntry returns the -// fs.FileInfo for the file the line references (os.Stat). -// If fs.DirEntry de is a fastwalk.DirEntry it's Stat() method is used and the -// returned fs.FileInfo may be a previously cached result. +// StatDirEntry returns a [fs.FileInfo] describing the named file ([os.Stat]). +// If de is a [fastwalk.DirEntry] its Stat method is used and the returned +// FileInfo may be cached from a prior call to Stat. If a cached result is not +// desired, users should just call [os.Stat] directly. +// +// This is a helper function for calling Stat on the DirEntry passed to the +// walkFn argument to [Walk]. +// +// The path argument is only used if de is not of type [fastwalk.DirEntry]. +// Therefore, de should be the DirEntry describing path. func StatDirEntry(path string, de fs.DirEntry) (fs.FileInfo, error) { if de == nil { return nil, &os.PathError{Op: "stat", Path: path, Err: syscall.EINVAL} diff --git a/dirent_portable.go b/dirent_portable.go index f35a2c8..aba3164 100644 --- a/dirent_portable.go +++ b/dirent_portable.go @@ -1,37 +1,123 @@ //go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) +// TODO: add a "portable_dirent" build tag so that we can test this +// on non-Windows platforms + package fastwalk import ( "io/fs" "os" + "slices" + "strings" + "sync" ) +var _ DirEntry = (*portableDirent)(nil) + type portableDirent struct { fs.DirEntry - path string - stat *fileInfo + parent string + stat *fileInfo +} + +func (d *portableDirent) String() string { + return fs.FormatDirEntry(d) } -// TODO: cache the result of Stat func (d *portableDirent) Stat() (fs.FileInfo, error) { if d.DirEntry.Type()&os.ModeSymlink == 0 { return d.DirEntry.Info() } stat := loadFileInfo(&d.stat) stat.once.Do(func() { - stat.FileInfo, stat.err = os.Stat(d.path) + stat.FileInfo, stat.err = os.Stat(d.parent + string(os.PathSeparator) + d.Name()) }) return stat.FileInfo, stat.err } -func newDirEntry(dirName string, info fs.DirEntry) fs.DirEntry { +func newDirEntry(dirName string, info fs.DirEntry) DirEntry { return &portableDirent{ DirEntry: info, - path: dirName + string(os.PathSeparator) + info.Name(), + parent: dirName, } } -func fileInfoToDirEntry(dirname string, fi fs.FileInfo) fs.DirEntry { +func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry { return newDirEntry(dirname, fs.FileInfoToDirEntry(fi)) } + +var direntSlicePool = sync.Pool{ + New: func() any { + a := make([]DirEntry, 0, 32) + return &a + }, +} + +func putDirentSlice(p *[]DirEntry) { + // max is half as many as Unix because twice the size + if p != nil && cap(*p) <= 16*1024 { + a := *p + for i := range a { + a[i] = nil + } + *p = a[:0] + direntSlicePool.Put(p) + } +} + +func sortDirents(mode SortMode, dents []DirEntry) { + if len(dents) <= 1 { + return + } + switch mode { + case SortLexical: + slices.SortFunc(dents, func(d1, d2 DirEntry) int { + return strings.Compare(d1.Name(), d2.Name()) + }) + case SortFilesFirst: + slices.SortFunc(dents, func(d1, d2 DirEntry) int { + r1 := d1.Type().IsRegular() + r2 := d2.Type().IsRegular() + switch { + case r1 && !r2: + return -1 + case !r1 && r2: + return 1 + case !r1 && !r2: + // Both are not regular files: sort directories last + dd1 := d1.Type().IsDir() + dd2 := d2.Type().IsDir() + switch { + case !dd1 && dd2: + return -1 + case dd1 && !dd2: + return 1 + } + } + return strings.Compare(d1.Name(), d2.Name()) + }) + case SortDirsFirst: + slices.SortFunc(dents, func(d1, d2 DirEntry) int { + dd1 := d1.Type().IsDir() + dd2 := d2.Type().IsDir() + switch { + case dd1 && !dd2: + return -1 + case !dd1 && dd2: + return 1 + case !dd1 && !dd2: + // Both are not directories: sort regular files first + r1 := d1.Type().IsRegular() + r2 := d2.Type().IsRegular() + switch { + case r1 && !r2: + return -1 + case !r1 && r2: + return 1 + } + } + return strings.Compare(d1.Name(), d2.Name()) + }) + } +} diff --git a/dirent_portable_test.go b/dirent_portable_test.go new file mode 100644 index 0000000..3018700 --- /dev/null +++ b/dirent_portable_test.go @@ -0,0 +1,131 @@ +//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) + +package fastwalk + +import ( + "io/fs" + "math/rand" + "reflect" + "testing" + "time" +) + +var _ DirEntry = dirEntry{} + +// Minimal DirEntry for testing +type dirEntry struct { + name string + typ fs.FileMode +} + +func (de dirEntry) Name() string { return de.name } +func (de dirEntry) IsDir() bool { return de.typ.IsDir() } +func (de dirEntry) Type() fs.FileMode { return de.typ.Type() } +func (de dirEntry) Info() (fs.FileInfo, error) { panic("not implemented") } +func (de dirEntry) Stat() (fs.FileInfo, error) { panic("not implemented") } + +func (de dirEntry) String() string { + return fs.FormatDirEntry(de) +} + +// NB: this must be kept in sync with the +// TestSortDirents in dirent_unix_test.go +func TestSortDirents(t *testing.T) { + direntNames := func(dents []DirEntry) []string { + names := make([]string, len(dents)) + for i, d := range dents { + names[i] = d.Name() + } + return names + } + + t.Run("None", func(t *testing.T) { + dents := []DirEntry{ + dirEntry{name: "b"}, + dirEntry{name: "a"}, + dirEntry{name: "d"}, + dirEntry{name: "c"}, + } + want := direntNames(dents) + sortDirents(SortNone, dents) + got := direntNames(dents) + if !reflect.DeepEqual(got, want) { + t.Errorf("got: %q want: %q", got, want) + } + }) + + rr := rand.New(rand.NewSource(time.Now().UnixNano())) + shuffleDirents := func(dents []DirEntry) []DirEntry { + rr.Shuffle(len(dents), func(i, j int) { + dents[i], dents[j] = dents[j], dents[i] + }) + return dents + } + + // dents needs to be in the expected order + test := func(t *testing.T, dents []DirEntry, mode SortMode) { + want := direntNames(dents) + // Run multiple times with different shuffles + for i := 0; i < 10; i++ { + t.Run("", func(t *testing.T) { + sortDirents(mode, shuffleDirents(dents)) + got := direntNames(dents) + if !reflect.DeepEqual(got, want) { + t.Errorf("got: %q want: %q", got, want) + } + }) + } + } + + t.Run("Lexical", func(t *testing.T) { + dents := []DirEntry{ + dirEntry{name: "a"}, + dirEntry{name: "b"}, + dirEntry{name: "c"}, + dirEntry{name: "d"}, + } + test(t, dents, SortLexical) + }) + + t.Run("FilesFirst", func(t *testing.T) { + dents := []DirEntry{ + // Files lexically + dirEntry{name: "f1", typ: 0}, + dirEntry{name: "f2", typ: 0}, + dirEntry{name: "f3", typ: 0}, + // Non-dirs lexically + dirEntry{name: "a1", typ: fs.ModeSymlink}, + dirEntry{name: "a2", typ: fs.ModeSymlink}, + dirEntry{name: "a3", typ: fs.ModeSymlink}, + dirEntry{name: "s1", typ: fs.ModeSocket}, + dirEntry{name: "s2", typ: fs.ModeSocket}, + dirEntry{name: "s3", typ: fs.ModeSocket}, + // Dirs lexically + dirEntry{name: "d1", typ: fs.ModeDir}, + dirEntry{name: "d2", typ: fs.ModeDir}, + dirEntry{name: "d3", typ: fs.ModeDir}, + } + test(t, dents, SortFilesFirst) + }) + + t.Run("DirsFirst", func(t *testing.T) { + dents := []DirEntry{ + // Dirs lexically + dirEntry{name: "d1", typ: fs.ModeDir}, + dirEntry{name: "d2", typ: fs.ModeDir}, + dirEntry{name: "d3", typ: fs.ModeDir}, + // Files lexically + dirEntry{name: "f1", typ: 0}, + dirEntry{name: "f2", typ: 0}, + dirEntry{name: "f3", typ: 0}, + // Non-dirs lexically + dirEntry{name: "a1", typ: fs.ModeSymlink}, + dirEntry{name: "a2", typ: fs.ModeSymlink}, + dirEntry{name: "a3", typ: fs.ModeSymlink}, + dirEntry{name: "s1", typ: fs.ModeSocket}, + dirEntry{name: "s2", typ: fs.ModeSocket}, + dirEntry{name: "s3", typ: fs.ModeSocket}, + } + test(t, dents, SortDirsFirst) + }) +} diff --git a/dirent_unix.go b/dirent_unix.go index 13e734e..2d59ff6 100644 --- a/dirent_unix.go +++ b/dirent_unix.go @@ -5,19 +5,23 @@ package fastwalk import ( "io/fs" "os" + "slices" + "strings" + "sync" ) type unixDirent struct { parent string name string - typ os.FileMode + typ fs.FileMode info *fileInfo stat *fileInfo } func (d *unixDirent) Name() string { return d.name } func (d *unixDirent) IsDir() bool { return d.typ.IsDir() } -func (d *unixDirent) Type() os.FileMode { return d.typ } +func (d *unixDirent) Type() fs.FileMode { return d.typ } +func (d *unixDirent) String() string { return fs.FormatDirEntry(d) } func (d *unixDirent) Info() (fs.FileInfo, error) { info := loadFileInfo(&d.info) @@ -38,7 +42,7 @@ func (d *unixDirent) Stat() (fs.FileInfo, error) { return stat.FileInfo, stat.err } -func newUnixDirent(parent, name string, typ os.FileMode) *unixDirent { +func newUnixDirent(parent, name string, typ fs.FileMode) *unixDirent { return &unixDirent{ parent: parent, name: name, @@ -46,7 +50,7 @@ func newUnixDirent(parent, name string, typ os.FileMode) *unixDirent { } } -func fileInfoToDirEntry(dirname string, fi fs.FileInfo) fs.DirEntry { +func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry { info := &fileInfo{ FileInfo: fi, } @@ -58,3 +62,77 @@ func fileInfoToDirEntry(dirname string, fi fs.FileInfo) fs.DirEntry { info: info, } } + +var direntSlicePool = sync.Pool{ + New: func() any { + a := make([]*unixDirent, 0, 32) + return &a + }, +} + +func putDirentSlice(p *[]*unixDirent) { + if p != nil && cap(*p) <= 32*1024 /* 256Kb */ { + a := *p + for i := range a { + a[i] = nil + } + *p = a[:0] + direntSlicePool.Put(p) + } +} + +func sortDirents(mode SortMode, dents []*unixDirent) { + if len(dents) <= 1 { + return + } + switch mode { + case SortLexical: + slices.SortFunc(dents, func(d1, d2 *unixDirent) int { + return strings.Compare(d1.name, d2.name) + }) + case SortFilesFirst: + slices.SortFunc(dents, func(d1, d2 *unixDirent) int { + r1 := d1.typ.IsRegular() + r2 := d2.typ.IsRegular() + switch { + case r1 && !r2: + return -1 + case !r1 && r2: + return 1 + case !r1 && !r2: + // Both are not regular files: sort directories last + dd1 := d1.typ.IsDir() + dd2 := d2.typ.IsDir() + switch { + case !dd1 && dd2: + return -1 + case dd1 && !dd2: + return 1 + } + } + return strings.Compare(d1.name, d2.name) + }) + case SortDirsFirst: + slices.SortFunc(dents, func(d1, d2 *unixDirent) int { + dd1 := d1.typ.IsDir() + dd2 := d2.typ.IsDir() + switch { + case dd1 && !dd2: + return -1 + case !dd1 && dd2: + return 1 + case !dd1 && !dd2: + // Both are not directories: sort regular files first + r1 := d1.typ.IsRegular() + r2 := d2.typ.IsRegular() + switch { + case r1 && !r2: + return -1 + case !r1 && r2: + return 1 + } + } + return strings.Compare(d1.name, d2.name) + }) + } +} diff --git a/dirent_unix_test.go b/dirent_unix_test.go index e2dbe42..bac8f03 100644 --- a/dirent_unix_test.go +++ b/dirent_unix_test.go @@ -1,15 +1,18 @@ -//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris +//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "io/fs" + "math/rand" "os" "path/filepath" + "reflect" "runtime" "sync" "sync/atomic" "testing" + "time" "unsafe" ) @@ -117,6 +120,108 @@ func TestUnixDirent(t *testing.T) { }) } +// NB: this must be kept in sync with the +// TestSortDirents in dirent_portable_test.go +func TestSortDirents(t *testing.T) { + direntNames := func(dents []*unixDirent) []string { + names := make([]string, len(dents)) + for i, d := range dents { + names[i] = d.Name() + } + return names + } + + t.Run("None", func(t *testing.T) { + dents := []*unixDirent{ + {name: "b"}, + {name: "a"}, + {name: "d"}, + {name: "c"}, + } + want := direntNames(dents) + sortDirents(SortNone, dents) + got := direntNames(dents) + if !reflect.DeepEqual(got, want) { + t.Errorf("got: %q want: %q", got, want) + } + }) + + rr := rand.New(rand.NewSource(time.Now().UnixNano())) + shuffleDirents := func(dents []*unixDirent) []*unixDirent { + rr.Shuffle(len(dents), func(i, j int) { + dents[i], dents[j] = dents[j], dents[i] + }) + return dents + } + + // dents needs to be in the expected order + test := func(t *testing.T, dents []*unixDirent, mode SortMode) { + want := direntNames(dents) + // Run multiple times with different shuffles + for i := 0; i < 10; i++ { + t.Run("", func(t *testing.T) { + sortDirents(mode, shuffleDirents(dents)) + got := direntNames(dents) + if !reflect.DeepEqual(got, want) { + t.Errorf("got: %q want: %q", got, want) + } + }) + } + } + + t.Run("Lexical", func(t *testing.T) { + dents := []*unixDirent{ + {name: "a"}, + {name: "b"}, + {name: "c"}, + {name: "d"}, + } + test(t, dents, SortLexical) + }) + + t.Run("FilesFirst", func(t *testing.T) { + dents := []*unixDirent{ + // Files lexically + {name: "f1", typ: 0}, + {name: "f2", typ: 0}, + {name: "f3", typ: 0}, + // Non-dirs lexically + {name: "a1", typ: fs.ModeSymlink}, + {name: "a2", typ: fs.ModeSymlink}, + {name: "a3", typ: fs.ModeSymlink}, + {name: "s1", typ: fs.ModeSocket}, + {name: "s2", typ: fs.ModeSocket}, + {name: "s3", typ: fs.ModeSocket}, + // Dirs lexically + {name: "d1", typ: fs.ModeDir}, + {name: "d2", typ: fs.ModeDir}, + {name: "d3", typ: fs.ModeDir}, + } + test(t, dents, SortFilesFirst) + }) + + t.Run("DirsFirst", func(t *testing.T) { + dents := []*unixDirent{ + // Dirs lexically + {name: "d1", typ: fs.ModeDir}, + {name: "d2", typ: fs.ModeDir}, + {name: "d3", typ: fs.ModeDir}, + // Files lexically + {name: "f1", typ: 0}, + {name: "f2", typ: 0}, + {name: "f3", typ: 0}, + // Non-dirs lexically + {name: "a1", typ: fs.ModeSymlink}, + {name: "a2", typ: fs.ModeSymlink}, + {name: "a3", typ: fs.ModeSymlink}, + {name: "s1", typ: fs.ModeSocket}, + {name: "s2", typ: fs.ModeSocket}, + {name: "s3", typ: fs.ModeSocket}, + } + test(t, dents, SortDirsFirst) + }) +} + func BenchmarkUnixDirentLoadFileInfo(b *testing.B) { wd, err := os.Getwd() if err != nil { diff --git a/entry_filter_unix.go b/entry_filter_unix.go index fa0fba8..eaba400 100644 --- a/entry_filter_unix.go +++ b/entry_filter_unix.go @@ -48,7 +48,7 @@ func (e *EntryFilter) seen(dev, ino uint64) (seen bool) { // TODO: this name is confusing and should be fixed -// Entry returns if path and fs.DirEntry have been seen before. +// Entry returns if path and [fs.DirEntry] have been seen before. func (e *EntryFilter) Entry(path string, de fs.DirEntry) (seen bool) { fi, err := StatDirEntry(path, de) if err != nil { diff --git a/fastwalk.go b/fastwalk.go index 2f3bcd0..6cdc58d 100644 --- a/fastwalk.go +++ b/fastwalk.go @@ -1,5 +1,5 @@ -// Package fastwalk provides a faster version of filepath.Walk for file system -// scanning tools. +// Package fastwalk provides a faster version of [filepath.WalkDir] for file +// system scanning tools. package fastwalk /* @@ -45,8 +45,9 @@ import ( "sync" ) -// ErrTraverseLink is used as a return value from WalkFuncs to indicate that the -// symlink named in the call may be traversed. +// ErrTraverseLink is used as a return value from WalkDirFuncs to indicate that +// the symlink named in the call may be traversed. This error is ignored if +// the Follow [Config] option is true. var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the @@ -59,8 +60,10 @@ var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") // as an error by any function. var SkipDir = fs.SkipDir +// TODO: add fs.SkipAll + // DefaultNumWorkers returns the default number of worker goroutines to use in -// [fastwalk.Walk] and is the value of [runtime.GOMAXPROCS](-1) clamped to a range +// [Walk] and is the value of [runtime.GOMAXPROCS](-1) clamped to a range // of 4 to 32 except on Darwin where it is either 4 (8 cores or less) or 6 // (more than 8 cores). This is because Walk / IO performance on Darwin // degrades with more concurrency. @@ -143,13 +146,108 @@ func DefaultToSlash() bool { return ok } -// DefaultConfig is the default Config used when none is supplied. +// SortMode determines the order that a directory's entries are visited by +// [Walk]. Sorting applies only at the directory level and since we process +// directories in parallel the order in which all files are visited is still +// non-deterministic. +// +// Sorting is mostly useful for programs that print the output of Walk since +// it makes it slightly more ordered compared to the default directory order. +// Sorting may also help some programs that wish to change the order in which +// a directory is processed by either processing all files first or enqueuing +// all directories before processing files. +// +// All lexical sorting is case-sensitive. +// +// The overhead of sorting is minimal compared to the syscalls needed to +// walk directories. The impact on performance due to changing the order +// in which directory entries are processed will be dependent on the workload +// and the structure of the file tree being visited (it might also have no +// impact). +type SortMode uint32 + +const ( + // Perform no sorting. Files will be visited in directory order. + // This is the default. + SortNone SortMode = iota + + // Directory entries are sorted by name before being visited. + SortLexical + + // Sort the directory entries so that regular files and non-directories + // (e.g. symbolic links) are visited before directories. Within each + // group (regular files, other files, directories) the entries are sorted + // by name. + // + // This is likely the mode that programs that print the output of Walk + // want to use. Since by processing all files before enqueuing + // sub-directories the output is slightly more grouped. + // + // Example order: + // - file: "a.txt" + // - file: "b.txt" + // - link: "a.link" + // - link: "b.link" + // - dir: "d1/" + // - dir: "d2/" + // + SortFilesFirst + + // Sort the directory entries so that directories are visited first, then + // regular files are visited, and finally everything else is visited + // (e.g. symbolic links). Within each group (directories, regular files, + // other files) the entries are sorted by name. + // + // This mode is might be useful at preventing other walk goroutines from + // stalling due to lack of work since it immediately enqueues all of a + // directory's sub-directories for processing. The impact on performance + // will be dependent on the workload and the structure of the file tree + // being visited - it might also have no (or even a negative) impact on + // performance so testing/benchmarking is recommend. + // + // An example workload that might cause this is: processing one directory + // takes a long time, that directory has sub-directories we want to walk, + // while processing that directory all other Walk goroutines have finished + // processing their directories, those goroutines are now stalled waiting + // for more work (waiting on the one running goroutine to enqueue its + // sub-directories for processing). + // + // This might also be beneficial if processing files is expensive. + // + // Example order: + // - dir: "d1/" + // - dir: "d2/" + // - file: "a.txt" + // - file: "b.txt" + // - link: "a.link" + // - link: "b.link" + // + SortDirsFirst +) + +var sortModeStrs = [...]string{ + SortNone: "None", + SortLexical: "Lexical", + SortDirsFirst: "DirsFirst", + SortFilesFirst: "FilesFirst", +} + +func (s SortMode) String() string { + if 0 <= int(s) && int(s) < len(sortModeStrs) { + return sortModeStrs[s] + } + return "SortMode(" + itoa(uint64(s)) + ")" +} + +// DefaultConfig is the default [Config] used when none is supplied. var DefaultConfig = Config{ Follow: false, ToSlash: DefaultToSlash(), NumWorkers: DefaultNumWorkers(), + Sort: SortNone, } +// A Config controls the behavior of [Walk]. type Config struct { // TODO: do we want to pass a sentinel error to WalkFunc if // a symlink loop is detected? @@ -178,56 +276,105 @@ type Config struct { // See FZF issue: https://github.com/junegunn/fzf/issues/3859 ToSlash bool + // Sort a directory's entries by SortMode before visiting them. + // The order that files are visited is deterministic only at the directory + // level, but not generally deterministic because we process directories + // in parallel. The performance impact of sorting entries is generally + // negligible compared to the syscalls required to read directories. + // + // This option mostly exists for programs that print the output of Walk + // (like FZF) since it provides some order and thus makes the output much + // nicer compared to the default directory order, which is basically random. + Sort SortMode + // Number of parallel workers to use. If NumWorkers if ≤ 0 then - // [DefaultNumWorkers] is used. + // DefaultNumWorkers is used. NumWorkers int } -// A DirEntry extends the fs.DirEntry interface to add a Stat() method -// that returns the result of calling os.Stat() on the underlying file. +// Copy returns a copy of c. If c is nil an empty [Config] is returned. +func (c *Config) Copy() *Config { + dupe := new(Config) + if c != nil { + *dupe = *c + } + return dupe +} + +// A DirEntry extends the [fs.DirEntry] interface to add a Stat() method +// that returns the result of calling [os.Stat] on the underlying file. // The results of Info() and Stat() are cached. // -// The fs.DirEntry argument passed to the fs.WalkDirFunc by Walk is -// always a DirEntry. The only exception is the root directory with -// with Walk is called. +// The [fs.DirEntry] argument passed to the [fs.WalkDirFunc] by [Walk] is +// always a DirEntry. type DirEntry interface { fs.DirEntry - // Stat returns the FileInfo for the file or subdirectory described + // Stat returns the fs.FileInfo for the file or subdirectory described // by the entry. The returned FileInfo may be from the time of the - // original directory read or from the time of the call to Stat. + // original directory read or from the time of the call to os.Stat. // If the entry denotes a symbolic link, Stat reports the information // about the target itself, not the link. Stat() (fs.FileInfo, error) } -// Walk is a faster implementation of filepath.Walk. +// Walk is a faster implementation of [filepath.WalkDir] that walks the file +// tree rooted at root in parallel, calling walkFn for each file or directory +// in the tree, including root. // -// filepath.Walk's design necessarily calls os.Lstat on each file, even if -// the caller needs less info. Many tools need only the type of each file. -// On some platforms, this information is provided directly by the readdir -// system call, avoiding the need to stat each file individually. -// fastwalk_unix.go contains a fork of the syscall routines. +// All errors that arise visiting files and directories are filtered by walkFn +// see the [fs.WalkDirFunc] documentation for details. +// The [IgnorePermissionErrors] adapter is provided to handle to common case of +// ignoring [fs.ErrPermission] errors. // -// See golang.org/issue/16399 +// By default files are walked in directory order, which makes the output +// non-deterministic. The Sort [Config] option can be used to control the order +// in which directory entries are visited, but since we walk the file tree in +// parallel the output is still non-deterministic (it's just slightly more +// sorted). // -// Walk walks the file tree rooted at root, calling walkFn for each file or -// directory in the tree, including root. +// When a symbolic link is encountered, by default Walk will not follow it +// unless walkFn returns [ErrTraverseLink] or the Follow [Config] setting is +// true. See below for a more detailed explanation. // -// If walkFn returns filepath.SkipDir, the directory is skipped. +// Walk calls walkFn with paths that use the separator character appropriate +// for the operating system unless the ToSlash [Config] setting is true which +// will cause all paths to be joined with a forward slash. +// +// If walkFn returns the [SkipDir] sentinel error, the directory is skipped. +// If walkFn returns the [ErrSkipFiles] sentinel error, the callback will not +// be called for any other files in the current directory. +// +// Unlike [filepath.WalkDir]: // -// Unlike filepath.WalkDir: -// - File stat calls must be done by the user and should be done via -// the DirEntry argument to walkFn since it caches the results of -// Stat and Lstat. -// - The fs.DirEntry argument is always a fastwalk.DirEntry, which has -// a Stat() method that returns the result of calling os.Stat() on the -// file. The result of Stat() may be cached. // - Multiple goroutines stat the filesystem concurrently. The provided // walkFn must be safe for concurrent use. -// - Walk can follow symlinks if walkFn returns the ErrTraverseLink -// sentinel error. It is the walkFn's responsibility to prevent -// Walk from going into symlink cycles. +// +// - The order that directories are visited is non-deterministic. +// +// - File stat calls must be done by the user and should be done via +// the [DirEntry] argument to walkFn. The [DirEntry] caches the result +// of both Info() and Stat(). The Stat() method is a fastwalk specific +// extension and can be called by casting the [fs.DirEntry] to a +// [fastwalk.DirEntry] or via the [StatDirEntry] helper. The [fs.DirEntry] +// argument to walkFn will always be convertible to a [fastwalk.DirEntry]. +// +// - The [fs.DirEntry] argument is always a [fastwalk.DirEntry], which has +// a Stat() method that returns the result of calling [os.Stat] on the +// file. The result of Stat() and Info() are cached. The [StatDirEntry] +// helper can be used to call Stat() on the returned [fastwalk.DirEntry]. +// +// - Walk can follow symlinks in two ways: the fist, and simplest, is to +// set Follow [Config] option to true - this will cause Walk to follow +// symlinks and detect/ignore any symlink loops; the second, is for walkFn +// to return the sentinel [ErrTraverseLink] error. +// When using [ErrTraverseLink] to follow symlinks it is walkFn's +// responsibility to prevent Walk from going into symlink cycles. +// By default Walk does not follow symbolic links. +// +// - When walking a directory, walkFn will be called for each non-directory +// entry and directories will be enqueued and visited at a later time or +// by another goroutine. func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { fi, err := os.Stat(root) if err != nil { @@ -253,7 +400,10 @@ func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { } w := &walker{ - fn: walkFn, + fn: walkFn, + // TODO: Increase the size of enqueuec so that we don't stall + // while processing a directory. Increasing the size of workc + // doesn't help as much (needs more testing). enqueuec: make(chan walkItem, numWorkers), // buffered for performance workc: make(chan walkItem, numWorkers), // buffered for performance donec: make(chan struct{}), @@ -261,8 +411,10 @@ func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { // buffered for correctness & not leaking goroutines: resc: make(chan error, numWorkers), - follow: conf.Follow, - toSlash: conf.ToSlash, + // TODO: we should just pass the Config + follow: conf.Follow, + toSlash: conf.ToSlash, + sortMode: conf.Sort, } if w.follow { w.ignoredDirs = append(w.ignoredDirs, fi) @@ -276,6 +428,8 @@ func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { } root = cleanRootPath(root) + // NOTE: in BenchmarkFastWalk the size of todo averages around + // 170 and can be in the ~250 range at max. todo := []walkItem{{dir: root, info: fileInfoToDirEntry(filepath.Dir(root), fi)}} out := 0 for { @@ -291,6 +445,8 @@ func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { todo = todo[:len(todo)-1] out++ case it := <-w.enqueuec: + // TODO: consider appending to todo directly and using a + // mutext this might help with contention around select todo = append(todo, it) case err := <-w.resc: out-- @@ -341,14 +497,15 @@ type walker struct { enqueuec chan walkItem // from workers resc chan error // from workers - ignoredDirs []os.FileInfo + ignoredDirs []fs.FileInfo follow bool toSlash bool + sortMode SortMode } type walkItem struct { dir string - info fs.DirEntry + info DirEntry callbackDone bool // callback already called; don't do it again } @@ -359,7 +516,7 @@ func (w *walker) enqueue(it walkItem) { } } -func (w *walker) shouldSkipDir(fi os.FileInfo) bool { +func (w *walker) shouldSkipDir(fi fs.FileInfo) bool { for _, ignored := range w.ignoredDirs { if os.SameFile(ignored, fi) { return true @@ -368,9 +525,8 @@ func (w *walker) shouldSkipDir(fi os.FileInfo) bool { return false } -func (w *walker) shouldTraverse(path string, de fs.DirEntry) bool { - // TODO: do we need to use filepath.EvalSymlinks() here? - ts, err := StatDirEntry(path, de) +func (w *walker) shouldTraverse(path string, de DirEntry) bool { + ts, err := de.Stat() if err != nil { return false } @@ -405,13 +561,14 @@ func (w *walker) joinPaths(dir, base string) string { } return dir + "/" + base } + // TODO: handle the above case of the argument to Walk being "/" if w.toSlash { return dir + "/" + base } return dir + string(os.PathSeparator) + base } -func (w *walker) onDirEnt(dirName, baseName string, de fs.DirEntry) error { +func (w *walker) onDirEnt(dirName, baseName string, de DirEntry) error { joined := w.joinPaths(dirName, baseName) typ := de.Type() if typ == os.ModeDir { @@ -442,7 +599,7 @@ func (w *walker) onDirEnt(dirName, baseName string, de fs.DirEntry) error { return err } -func (w *walker) walk(root string, info fs.DirEntry, runUserCallback bool) error { +func (w *walker) walk(root string, info DirEntry, runUserCallback bool) error { if runUserCallback { err := w.fn(root, info, nil) if err == filepath.SkipDir { @@ -453,7 +610,7 @@ func (w *walker) walk(root string, info fs.DirEntry, runUserCallback bool) error } } - err := readDir(root, w.onDirEnt) + err := w.readDir(root) if err != nil { // Second call, to report ReadDir error. return w.fn(root, info, err) @@ -472,3 +629,17 @@ func cleanRootPath(root string) string { } return root } + +// Avoid the dependency on strconv since it pulls in a large number of other +// dependencies which bloats the size of this package. +func itoa(val uint64) string { + buf := make([]byte, 20) + i := len(buf) - 1 + for val >= 10 { + buf[i] = byte(val%10 + '0') + i-- + val /= 10 + } + buf[i] = byte(val + '0') + return string(buf[i:]) +} diff --git a/fastwalk_darwin.go b/fastwalk_darwin.go index f7e31de..107e91f 100644 --- a/fastwalk_darwin.go +++ b/fastwalk_darwin.go @@ -3,16 +3,12 @@ package fastwalk import ( - "io/fs" "os" "syscall" "unsafe" ) -//sys closedir(dir uintptr) (err error) -//sys readdir_r(dir uintptr, entry *Dirent, result **Dirent) (res Errno) - -func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) error) (err error) { +func (w *walker) readDir(dirName string) (err error) { var fd uintptr for { fd, err = opendir(dirName) @@ -25,6 +21,12 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er } defer closedir(fd) //nolint:errcheck + var p *[]*unixDirent + if w.sortMode != SortNone { + p = direntSlicePool.Get().(*[]*unixDirent) + } + defer putDirentSlice(p) + skipFiles := false var dirent syscall.Dirent var entptr *syscall.Dirent @@ -66,14 +68,36 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er continue } nm := string(name) - if err := fn(dirName, nm, newUnixDirent(dirName, nm, typ)); err != nil { + de := newUnixDirent(dirName, nm, typ) + if w.sortMode == SortNone { + if err := w.onDirEnt(dirName, nm, de); err != nil { + if err != ErrSkipFiles { + return err + } + skipFiles = true + } + } else { + *p = append(*p, de) + } + } + if w.sortMode == SortNone { + return nil + } + + dents := *p + sortDirents(w.sortMode, dents) + for _, d := range dents { + d := d + if skipFiles && d.typ.IsRegular() { + continue + } + if err := w.onDirEnt(dirName, d.Name(), d); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } - return nil } diff --git a/fastwalk_darwin_test.go b/fastwalk_darwin_test.go deleted file mode 100644 index 769f330..0000000 --- a/fastwalk_darwin_test.go +++ /dev/null @@ -1,130 +0,0 @@ -//go:build darwin && go1.13 - -package fastwalk - -import ( - "flag" - "io/fs" - "os" - "path/filepath" - "runtime" - "sort" - "strconv" - "testing" -) - -func TestDarwinReaddir(t *testing.T) { - wd, err := os.Getwd() - if err != nil { - t.Fatal(err) - } - want, err := os.ReadDir(wd) - if err != nil { - t.Fatal(err) - } - - rdEnts, err := os.ReadDir(wd) - if err != nil { - t.Fatal(err) - } - - var gdEnts []fs.DirEntry - err = readDir(wd, func(_, _ string, de fs.DirEntry) error { - gdEnts = append(gdEnts, de) - return nil - }) - if err != nil { - t.Fatal(err) - } - - sort.Slice(rdEnts, func(i, j int) bool { - return rdEnts[i].Name() < rdEnts[j].Name() - }) - sort.Slice(gdEnts, func(i, j int) bool { - return gdEnts[i].Name() < gdEnts[j].Name() - }) - - sameDirEntry := func(d1, d2 fs.DirEntry) bool { - if d1.Name() != d2.Name() || d1.IsDir() != d2.IsDir() || d1.Type() != d2.Type() { - return false - } - fi1, e1 := d1.Info() - fi2, e2 := d2.Info() - if e1 != e2 { - return false - } - return os.SameFile(fi1, fi2) - } - - for i := range want { - de := want[i] - re := rdEnts[i] - ge := gdEnts[i] - if !sameDirEntry(de, re) { - t.Errorf("Readir: %q: want: %#v get: %#v", de.Name(), de, re) - } - if !sameDirEntry(de, ge) { - t.Errorf("Getdirentries: %q: want: %#v get: %#v", de.Name(), de, ge) - } - } - if len(rdEnts) != len(want) { - t.Errorf("Readir returned %d entries want: %d", len(rdEnts), len(want)) - } - if len(gdEnts) != len(want) { - t.Errorf("Getdirentries returned %d entries want: %d", len(gdEnts), len(want)) - } -} - -var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFastWalk") - -func noopReadDirFunc(_, _ string, _ fs.DirEntry) error { - return nil -} - -func benchmarkReadDir(b *testing.B, parallel bool, fn func(dirName string, fn func(dirName, entName string, de fs.DirEntry) error) error) { - mktemp := func(sz int) string { - dir := filepath.Join(b.TempDir(), strconv.Itoa(sz)) - if err := os.MkdirAll(dir, 0755); err != nil { - b.Fatal(err) - } - for i := 0; i < sz; i++ { - name := strconv.Itoa(i) - if err := os.WriteFile(filepath.Join(dir, name), []byte(name), 0644); err != nil { - b.Fatal(err) - } - } - return dir - } - sizes := []int{4, 8, 16, 32, 64, 128, 256} - for _, sz := range sizes { - dir := mktemp(sz) - b.Run(strconv.Itoa(sz), func(b *testing.B) { - if parallel { - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - fn(dir, noopReadDirFunc) - } - }) - } else { - for i := 0; i < b.N; i++ { - fn(dir, noopReadDirFunc) - } - } - }) - } -} - -func BenchmarkReadDir(b *testing.B) { - benchmarkReadDir(b, false, readDir) -} - -func BenchmarkReadDirParallel(b *testing.B) { - dirname := *benchDir - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if err := readDir(dirname, noopReadDirFunc); err != nil { - b.Fatal(err) - } - } - }) -} diff --git a/fastwalk_portable.go b/fastwalk_portable.go index dbac5e1..6956c36 100644 --- a/fastwalk_portable.go +++ b/fastwalk_portable.go @@ -3,7 +3,6 @@ package fastwalk import ( - "io/fs" "os" ) @@ -11,7 +10,7 @@ import ( // It does not descend into directories or follow symlinks. // If fn returns a non-nil error, readDir returns with that error // immediately. -func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) error) error { +func (w *walker) readDir(dirName string) error { f, err := os.Open(dirName) if err != nil { return err @@ -22,6 +21,12 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er return readErr } + var p *[]DirEntry + if w.sortMode != SortNone { + p = direntSlicePool.Get().(*[]DirEntry) + } + defer putDirentSlice(p) + var skipFiles bool for _, d := range des { if skipFiles && d.Type().IsRegular() { @@ -29,13 +34,34 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er } // Need to use FileMode.Type().Type() for fs.DirEntry e := newDirEntry(dirName, d) - if err := fn(dirName, d.Name(), e); err != nil { + if w.sortMode == SortNone { + if err := w.onDirEnt(dirName, d.Name(), e); err != nil { + if err != ErrSkipFiles { + return err + } + skipFiles = true + } + } else { + *p = append(*p, e) + } + } + if w.sortMode == SortNone { + return readErr + } + + dents := *p + sortDirents(w.sortMode, dents) + for _, d := range dents { + d := d + if skipFiles && d.Type().IsRegular() { + continue + } + if err := w.onDirEnt(dirName, d.Name(), d); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } - return readErr } diff --git a/fastwalk_test.go b/fastwalk_test.go index 302c7d6..e75f6fa 100644 --- a/fastwalk_test.go +++ b/fastwalk_test.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "io/fs" + "math" "os" "path/filepath" "reflect" @@ -75,11 +76,11 @@ func symlink(t testing.TB, oldname, newname string) error { func cleanupOrLogTempDir(t *testing.T, tempdir string) { if e := recover(); e != nil { - t.Log("TMPDIR:", tempdir) + t.Log("TMPDIR:", filepath.ToSlash(tempdir)) t.Fatal(e) } if t.Failed() { - t.Log("TMPDIR:", tempdir) + t.Log("TMPDIR:", filepath.ToSlash(tempdir)) } else { os.RemoveAll(tempdir) } @@ -112,7 +113,9 @@ func testCreateFiles(t *testing.T, tempdir string, files map[string]string) { } } -func testFastWalkConf(t *testing.T, conf *fastwalk.Config, files map[string]string, callback fs.WalkDirFunc, want map[string]os.FileMode) { +func testFastWalkConf(t *testing.T, conf *fastwalk.Config, files map[string]string, + callback fs.WalkDirFunc, want map[string]os.FileMode) { + tempdir, err := os.MkdirTemp("", "test-fast-walk") if err != nil { t.Fatal(err) @@ -150,7 +153,9 @@ func testFastWalkConf(t *testing.T, conf *fastwalk.Config, files map[string]stri } } -func testFastWalk(t *testing.T, files map[string]string, callback fs.WalkDirFunc, want map[string]os.FileMode) { +func testFastWalk(t *testing.T, files map[string]string, + callback fs.WalkDirFunc, want map[string]os.FileMode) { + testFastWalkConf(t, nil, files, callback, want) } @@ -213,6 +218,9 @@ func maxFileNameLength(t testing.TB) int { // This test identified a "checkptr: converted pointer straddles multiple allocations" // error on darwin when getdirentries64 was used with the race-detector enabled. func TestFastWalk_LongFileName(t *testing.T) { + // Test is slow since we need to find the longest allowed filename + t.Parallel() + maxNameLen := maxFileNameLength(t) if maxNameLen > 255 { maxNameLen = 255 @@ -268,8 +276,9 @@ func maxPathLength(t testing.TB) (root string, pathMax int) { var w strings.Builder w.Grow(n + 1) w.WriteString(base) - for w.Len() < n-32 { - w.WriteString("/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + elem := "/" + strings.Repeat("a", 127) // path element + for w.Len() < n-len(elem) { + w.WriteString(elem) } for w.Len() < n { w.WriteByte('b') @@ -321,9 +330,13 @@ func maxPathLength(t testing.TB) (root string, pathMax int) { // Test that we can handle PATH_MAX. This is mostly for the Unix tests // where we pass a buffer to ReadDirect (often getdents64(2)). func TestFastWalk_LongPath(t *testing.T) { + // Test is slow since we need to find the longest allowed file path + t.Parallel() + if runtime.GOOS == "windows" { t.Skip("test not needed on Windows") } + root, pathMax := maxPathLength(t) t.Log("PATH_MAX:", pathMax) @@ -361,7 +374,7 @@ func TestFastWalk_LongPath(t *testing.T) { // Don't print the delta here since it might be very large. Instead // write it to two temp files in a directory that is not removed on // test exit so that the user can compare them themselves. - tempdir, err := os.MkdirTemp("", "test-fast-walk") + tempdir, err := os.MkdirTemp("", "fastwalk-test-*") if err != nil { t.Error(err) } @@ -432,60 +445,100 @@ func TestFastWalk_DirEntryType(t *testing.T) { } func TestFastWalk_SkipDir(t *testing.T) { - testFastWalk(t, map[string]string{ - "foo/foo.go": "one", - "bar/bar.go": "two", - "skip/skip.go": "skip", - }, - func(path string, de fs.DirEntry, err error) error { - requireNoError(t, err) - typ := de.Type().Type() - if typ == os.ModeDir && strings.HasSuffix(path, "skip") { - return filepath.SkipDir - } - return nil + test := func(t *testing.T, mode fastwalk.SortMode) { + conf := fastwalk.DefaultConfig.Copy() + conf.Sort = mode + testFastWalkConf(t, conf, map[string]string{ + "foo/foo.go": "one", + "bar/bar.go": "two", + "skip/skip.go": "skip", }, - map[string]os.FileMode{ - "": os.ModeDir, - "/src": os.ModeDir, - "/src/bar": os.ModeDir, - "/src/bar/bar.go": 0, - "/src/foo": os.ModeDir, - "/src/foo/foo.go": 0, - "/src/skip": os.ModeDir, + func(path string, de fs.DirEntry, err error) error { + requireNoError(t, err) + typ := de.Type().Type() + if typ == os.ModeDir && strings.HasSuffix(path, "skip") { + return filepath.SkipDir + } + return nil + }, + map[string]os.FileMode{ + "": os.ModeDir, + "/src": os.ModeDir, + "/src/bar": os.ModeDir, + "/src/bar/bar.go": 0, + "/src/foo": os.ModeDir, + "/src/foo/foo.go": 0, + "/src/skip": os.ModeDir, + }) + } + + // Test that sorting respects fastwalk.ErrSkipFiles + for _, mode := range []fastwalk.SortMode{ + fastwalk.SortNone, + fastwalk.SortLexical, + fastwalk.SortDirsFirst, + fastwalk.SortFilesFirst, + } { + t.Run(mode.String(), func(t *testing.T) { + test(t, mode) }) + } } func TestFastWalk_SkipFiles(t *testing.T) { - // Directory iteration order is undefined, so there's no way to know - // which file to expect until the walk happens. Rather than mess - // with the test infrastructure, just mutate want. - var mu sync.Mutex - want := map[string]os.FileMode{ - "": os.ModeDir, - "/src": os.ModeDir, - "/src/zzz": os.ModeDir, - "/src/zzz/c.go": 0, + mapKeys := func(m map[string]os.FileMode) []string { + a := make([]string, 0, len(m)) + for k := range m { + a = append(a, k) + } + return a + } + + test := func(t *testing.T, mode fastwalk.SortMode) { + // Directory iteration order is undefined, so there's no way to know + // which file to expect until the walk happens. Rather than mess + // with the test infrastructure, just mutate want. + want := map[string]os.FileMode{ + "": os.ModeDir, + "/src": os.ModeDir, + "/src/zzz": os.ModeDir, + "/src/zzz/c.go": 0, + } + conf := fastwalk.DefaultConfig.Copy() + conf.Sort = mode + var mu sync.Mutex + testFastWalkConf(t, conf, map[string]string{ + "a_skipfiles.go": "a", + "b_skipfiles.go": "b", + "zzz/c.go": "c", + }, + func(path string, _ fs.DirEntry, err error) error { + requireNoError(t, err) + if strings.HasSuffix(path, "_skipfiles.go") { + mu.Lock() + defer mu.Unlock() + want["/src/"+filepath.Base(path)] = 0 + return fastwalk.ErrSkipFiles + } + return nil + }, + want) + if len(want) != 5 { + t.Errorf("invalid number of files visited: wanted 5, got %v (%q)", + len(want), mapKeys(want)) + } } - testFastWalk(t, map[string]string{ - "a_skipfiles.go": "a", - "b_skipfiles.go": "b", - "zzz/c.go": "c", - }, - func(path string, _ fs.DirEntry, err error) error { - requireNoError(t, err) - if strings.HasSuffix(path, "_skipfiles.go") { - mu.Lock() - defer mu.Unlock() - want["/src/"+filepath.Base(path)] = 0 - return fastwalk.ErrSkipFiles - } - return nil - }, - want) - if len(want) != 5 { - t.Errorf("saw too many files: wanted 5, got %v (%v)", len(want), want) + // Test that sorting respects fastwalk.ErrSkipFiles + for _, mode := range []fastwalk.SortMode{ + fastwalk.SortNone, + fastwalk.SortLexical, + fastwalk.SortDirsFirst, + fastwalk.SortFilesFirst, + } { + t.Run(mode.String(), func(t *testing.T) { + test(t, mode) + }) } } @@ -622,7 +675,7 @@ func TestFastWalk_Follow_SkipDir(t *testing.T) { } func TestFastWalk_Follow_SymlinkLoop(t *testing.T) { - tempdir, err := os.MkdirTemp("", "test-fast-walk") + tempdir, err := os.MkdirTemp("", "fastwalk-test-*") if err != nil { t.Fatal(err) } @@ -727,7 +780,7 @@ func TestFastWalk_ErrNotExist(t *testing.T) { func TestFastWalk_ErrPermission(t *testing.T) { if runtime.GOOS == "windows" { - t.Skip("test not-supported for Windows") + t.Skip("test not supported for Windows") } tempdir := t.TempDir() want := map[string]os.FileMode{ @@ -757,6 +810,9 @@ func TestFastWalk_ErrPermission(t *testing.T) { if err := os.Remove(filename); err != nil { t.Error(err) } + if err := os.Chmod(dirname, 0755); err != nil { + t.Log(err) + } if err := os.Remove(dirname); err != nil { t.Error(err) } @@ -827,6 +883,149 @@ func TestFastWalk_ToSlash(t *testing.T) { } } +func TestFastWalk_SortMode(t *testing.T) { + // Can only assert on files since the order that directories are + // traversed is non-deterministic. + + tmp, err := os.MkdirTemp("", "test-fast-walk") + if err != nil { + t.Fatal(err) + } + defer cleanupOrLogTempDir(t, tmp) + + want := []string{ + "a.txt", "b.txt", "c.txt", "d.txt", "e.txt", "f.txt", + "a.lnk", "b.lnk", "c.lnk", "d.lnk", "e.lnk", "f.lnk", + } + for _, name := range want { + path := filepath.Join(tmp, name) + if strings.HasSuffix(name, ".txt") { + if err := writeFile(path, "data", 0666); err != nil { + t.Fatal(err) + } + } else { + if err := symlink(t, path, path); err != nil { + t.Fatal(err) + } + } + } + + for _, mode := range []fastwalk.SortMode{ + fastwalk.SortLexical, + fastwalk.SortFilesFirst, + // We don't actually have any dirs because the order + // they're visited is non-deterministic. + fastwalk.SortDirsFirst, + } { + t.Run(mode.String(), func(t *testing.T) { + want := append([]string(nil), want...) + if mode == fastwalk.SortLexical { + sort.Strings(want) + } + + conf := fastwalk.Config{ + Sort: mode, + } + // We technically don't need a mutex since we're visiting + // only one directory, but use it for correctness. + var mu sync.Mutex + var got []string + fastwalk.Walk(&conf, tmp, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + // Ignore the parent directory + if !d.IsDir() { + mu.Lock() + got = append(got, d.Name()) + mu.Unlock() + } + return nil + }) + if !reflect.DeepEqual(got, want) { + t.Errorf("Invalid output\ngot: %q\nwant: %q", got, want) + } + }) + } +} + +func TestSortModeString(t *testing.T) { + tests := []struct { + mode fastwalk.SortMode + want string + }{ + {fastwalk.SortNone, "None"}, + {fastwalk.SortLexical, "Lexical"}, + {fastwalk.SortDirsFirst, "DirsFirst"}, + {fastwalk.SortFilesFirst, "FilesFirst"}, + {100, "SortMode(100)"}, + {math.MaxUint32, fmt.Sprintf("SortMode(%d)", math.MaxUint32)}, + } + for _, test := range tests { + got := test.mode.String() + if got != test.want { + t.Errorf("%d: got: %s want: %s", test.mode, got, test.want) + } + } +} + +func TestConfigCopy(t *testing.T) { + t.Run("Nil", func(t *testing.T) { + c := (*fastwalk.Config)(nil).Copy() + if c == nil { + t.Fatal("failed to copy nil config") + } + if *c != (fastwalk.Config{}) { + t.Fatal("copy of nil config should be empty") + } + }) + t.Run("Copy", func(t *testing.T) { + a := fastwalk.DefaultConfig + c := a.Copy() + c.NumWorkers *= 2 + if a.NumWorkers == c.NumWorkers { + t.Fatal("failed to copy config") + } + }) +} + +func TestFastWalkJoinPaths(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("not supported on Windows") + } + if abs, err := filepath.Abs("/"); err != nil || abs != "/" { + t.Skipf(`skipping filepath.Abs("/") = %q, %v; want: "/", nil`, abs, err) + } + sentinel := errors.New("halt now") + var root string + var once sync.Once + err := fastwalk.Walk(nil, "///", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + once.Do(func() { + root = path + }) + return sentinel + }) + if err != nil && err != sentinel { + t.Fatal(err) + } + if root != "/" { + t.Fatalf(`failed to convert root "///" to "/" got: %q`, root) + } +} + +func BenchmarkSortModeString(b *testing.B) { + var s string + for i := 0; i < b.N; i++ { + s = fastwalk.SortMode(10).String() + } + if b.Failed() { + b.Log(s) + } +} + func diffFileModes(t *testing.T, got, want map[string]os.FileMode) { type Mode struct { Name string @@ -919,6 +1118,23 @@ func BenchmarkFastWalk(b *testing.B) { benchmarkFastWalk(b, nil, nil) } +func BenchmarkFastWalkSort(b *testing.B) { + for _, mode := range []fastwalk.SortMode{ + fastwalk.SortNone, + fastwalk.SortLexical, + fastwalk.SortDirsFirst, + fastwalk.SortFilesFirst, + } { + b.Run(mode.String(), func(b *testing.B) { + conf := fastwalk.DefaultConfig.Copy() + conf.Sort = mode + benchmarkFastWalk(b, conf, func(x fs.WalkDirFunc) fs.WalkDirFunc { + return noopWalkFunc + }) + }) + } +} + func BenchmarkFastWalkFollow(b *testing.B) { benchmarkFastWalk(b, &fastwalk.Config{Follow: true}, nil) } diff --git a/fastwalk_unix.go b/fastwalk_unix.go index 67a6d97..69f2e0b 100644 --- a/fastwalk_unix.go +++ b/fastwalk_unix.go @@ -7,7 +7,6 @@ package fastwalk import ( - "io/fs" "os" "syscall" @@ -21,13 +20,19 @@ const blockSize = 8192 // value used to represent a syscall.DT_UNKNOWN Dirent.Type. const unknownFileMode os.FileMode = ^os.FileMode(0) -func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) error) error { +func (w *walker) readDir(dirName string) error { fd, err := open(dirName, 0, 0) if err != nil { return &os.PathError{Op: "open", Path: dirName, Err: err} } defer syscall.Close(fd) + var p *[]*unixDirent + if w.sortMode != SortNone { + p = direntSlicePool.Get().(*[]*unixDirent) + } + defer putDirentSlice(p) + // The buffer must be at least a block long. buf := make([]byte, blockSize) // stack-allocated; doesn't escape bufp := 0 // starting read position in buf @@ -41,7 +46,7 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er return os.NewSyscallError("readdirent", err) } if nbuf <= 0 { - return nil + break // exit loop } } consumed, name, typ := dirent.Parse(buf[bufp:nbuf]) @@ -68,14 +73,37 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er continue } de := newUnixDirent(dirName, name, typ) - if err := fn(dirName, name, de); err != nil { - if err == ErrSkipFiles { - skipFiles = true - continue + if w.sortMode == SortNone { + if err := w.onDirEnt(dirName, name, de); err != nil { + if err == ErrSkipFiles { + skipFiles = true + continue + } + return err + } + } else { + *p = append(*p, de) + } + } + if w.sortMode == SortNone { + return nil + } + + dents := *p + sortDirents(w.sortMode, dents) + for _, d := range dents { + d := d + if skipFiles && d.typ.IsRegular() { + continue + } + if err := w.onDirEnt(dirName, d.Name(), d); err != nil { + if err != ErrSkipFiles { + return err } - return err + skipFiles = true } } + return nil } // According to https://golang.org/doc/go1.14#runtime diff --git a/zsyscall_darwin.go b/zsyscall_darwin.go index aef3752..cba92d3 100644 --- a/zsyscall_darwin.go +++ b/zsyscall_darwin.go @@ -44,7 +44,9 @@ func opendir(path string) (dir uintptr, err error) { // We implent opendir so that we don't have to open a file, duplicate // it's FD, then call fdopendir with it. - var buf [1024]byte // Tested by TestFastWalk_LongPath + const maxPath = len(syscall.Dirent{}.Name) // Tested by TestFastWalk_LongPath + + var buf [maxPath]byte if len(path) >= len(buf) { return 0, errEINVAL }