diff --git a/changelogs/unreleased/8449-kaovilai b/changelogs/unreleased/8449-kaovilai new file mode 100644 index 0000000000..34f4f9f155 --- /dev/null +++ b/changelogs/unreleased/8449-kaovilai @@ -0,0 +1 @@ +pkg/archive: Use map of long names avoiding max path limits diff --git a/internal/delete/delete_item_action_handler.go b/internal/delete/delete_item_action_handler.go index ba242c0ca0..120049ec45 100644 --- a/internal/delete/delete_item_action_handler.go +++ b/internal/delete/delete_item_action_handler.go @@ -59,7 +59,7 @@ func InvokeDeleteActions(ctx *Context) error { } // get items out of backup tarball into a temp directory - dir, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader) + dir, longNames, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader) if err != nil { return errors.Wrapf(err, "error extracting backup") } @@ -71,7 +71,7 @@ func InvokeDeleteActions(ctx *Context) error { ctx.Log.Debugf("Downloaded and extracted the backup file to: %s", dir) - backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir) + backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir, longNames) if existErr := errors.Is(err, archive.ErrNotExist); existErr { ctx.Log.Debug("ignore invoking delete item actions: ", err) return nil diff --git a/pkg/archive/extractor.go b/pkg/archive/extractor.go index 87d9ab413c..fd19d5dd8a 100644 --- a/pkg/archive/extractor.go +++ b/pkg/archive/extractor.go @@ -19,6 +19,8 @@ package archive import ( "archive/tar" "compress/gzip" + "crypto/sha256" + "fmt" "io" "path/filepath" @@ -42,11 +44,11 @@ func NewExtractor(log logrus.FieldLogger, fs filesystem.Interface) *Extractor { } // UnzipAndExtractBackup extracts a reader on a gzipped tarball to a local temp directory -func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, error) { +func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, map[string]string, error) { gzr, err := gzip.NewReader(src) if err != nil { e.log.Infof("error creating gzip reader: %v", err) - return "", err + return "", nil, err } defer gzr.Close() @@ -66,13 +68,16 @@ func (e *Extractor) writeFile(target string, tarRdr *tar.Reader) error { return nil } -func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) { +const maxPathLength = 255 // could be longer but 255 is common. + +// returns tempfir containing backup contents, map[sha256]longNames, error +func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, map[string]string, error) { dir, err := e.fs.TempDir("", "") if err != nil { e.log.Infof("error creating temp dir: %v", err) - return "", err + return "", nil, err } - + longNames := make(map[string]string) for { header, err := tarRdr.Next() @@ -81,17 +86,23 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) { } if err != nil { e.log.Infof("error reading tar: %v", err) - return "", err + return "", nil, err } target := filepath.Join(dir, header.Name) //nolint:gosec // Internal usage. No need to check. - + // If the target is longer than maxPathLength, we'll use the sha256 of the header name as the filename. + // https://github.com/vmware-tanzu/velero/issues/8434 + if len(target) > maxPathLength { + shortSha256Name := fmt.Sprintf("%x", sha256.Sum256([]byte(header.Name))) // sha256 name is 64 characters + longNames[shortSha256Name] = header.Name + target = filepath.Join(dir, shortSha256Name) + } switch header.Typeflag { case tar.TypeDir: err := e.fs.MkdirAll(target, header.FileInfo().Mode()) if err != nil { e.log.Infof("mkdirall error: %v", err) - return "", err + return "", nil, err } case tar.TypeReg: @@ -99,16 +110,15 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) { err := e.fs.MkdirAll(filepath.Dir(target), header.FileInfo().Mode()) if err != nil { e.log.Infof("mkdirall error: %v", err) - return "", err + return "", nil, err } // create the file if err := e.writeFile(target, tarRdr); err != nil { e.log.Infof("error copying: %v", err) - return "", err + return "", nil, err } } } - - return dir, nil + return dir, longNames, nil } diff --git a/pkg/archive/extractor_test.go b/pkg/archive/extractor_test.go index 47cea734c4..48ee00b714 100644 --- a/pkg/archive/extractor_test.go +++ b/pkg/archive/extractor_test.go @@ -18,11 +18,15 @@ package archive import ( "archive/tar" + "bytes" "compress/gzip" "io" "os" + "path/filepath" + "strings" "testing" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "github.com/vmware-tanzu/velero/pkg/test" @@ -75,7 +79,7 @@ func TestUnzipAndExtractBackup(t *testing.T) { file, err := ext.fs.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644) require.NoError(t, err) - _, err = ext.UnzipAndExtractBackup(file.(io.Reader)) + _, _, err = ext.UnzipAndExtractBackup(file.(io.Reader)) if tc.wantErr && (err == nil) { t.Errorf("%s: wanted error but got nil", tc.name) } @@ -149,3 +153,51 @@ func createRegular(fs filesystem.Interface) (string, error) { return outName, nil } + +func TestReadBackupWithLongFilenames(t *testing.T) { + log := logrus.New() + fs := test.NewFakeFileSystem() + e := NewExtractor(log, fs) + + // Create a tar reader with a file that has a very long name + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + + // Create a filename longer than maxPathLength + longFilename := strings.Repeat("a", maxPathLength+10) + ".txt" + content := []byte("test content") + + hdr := &tar.Header{ + Name: longFilename, + Mode: 0600, + Size: int64(len(content)), + Typeflag: tar.TypeReg, + } + + if err := tw.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err := tw.Write(content); err != nil { + t.Fatal(err) + } + tw.Close() + + // Read the backup + dir, longNames, err := e.readBackup(tar.NewReader(&buf)) + + // Verify results + require.NoError(t, err) + require.NotEmpty(t, dir) + require.NotEmpty(t, longNames) + + // Verify that a shortened SHA256 name was created and mapped correctly + found := false + for shortName, originalName := range longNames { + if originalName == longFilename { + found = true + // Verify the short name length is within limits + require.LessOrEqual(t, len(filepath.Join(dir, shortName)), maxPathLength) + } + } + require.True(t, found, "Long filename mapping not found") +} diff --git a/pkg/archive/parser.go b/pkg/archive/parser.go index 166e03114d..4befee6574 100644 --- a/pkg/archive/parser.go +++ b/pkg/archive/parser.go @@ -64,7 +64,7 @@ func NewParser(log logrus.FieldLogger, fs filesystem.Interface) *Parser { // Parse reads an extracted backup on the file system and returns // a structured catalog of the resources and items contained within it. -func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { +func (p *Parser) Parse(dir string, longNames map[string]string) (map[string]*ResourceItems, error) { // ensure top-level "resources" directory exists, and read subdirectories // of it, where each one is expected to correspond to a resource. resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir) @@ -81,9 +81,8 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { p.log.Warnf("Ignoring unexpected file %q in directory %q", resourceDir.Name(), strings.TrimPrefix(resourcesDir, dir+"/")) continue } - resourceItems := &ResourceItems{ - GroupResource: resourceDir.Name(), + GroupResource: itemNameIfHasLongName(resourceDir.Name(), longNames), ItemsByNamespace: map[string][]string{}, } @@ -95,7 +94,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { return nil, errors.Wrapf(err, "error checking for existence of directory %q", strings.TrimPrefix(clusterScopedDir, dir+"/")) } if exists { - items, err := p.getResourceItemsForScope(clusterScopedDir, dir) + items, err := p.getResourceItemsForScope(clusterScopedDir, dir, longNames) if err != nil { return nil, err } @@ -124,7 +123,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { continue } - items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir) + items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir, longNames) if err != nil { return nil, err } @@ -135,7 +134,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { } } - resources[resourceDir.Name()] = resourceItems + resources[itemNameIfHasLongName(resourceDir.Name(), longNames)] = resourceItems } return resources, nil @@ -143,7 +142,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) { // getResourceItemsForScope returns the list of items with a namespace or // cluster-scoped subdirectory for a specific resource. -func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string, error) { +func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string, longNames map[string]string) ([]string, error) { files, err := p.fs.ReadDir(dir) if err != nil { return nil, errors.Wrapf(err, "error reading contents of directory %q", strings.TrimPrefix(dir, archiveRootDir+"/")) @@ -156,12 +155,22 @@ func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string, continue } - items = append(items, strings.TrimSuffix(file.Name(), ".json")) + items = append(items, itemNameIfHasLongName(strings.TrimSuffix(file.Name(), ".json"), longNames)) } return items, nil } +func itemNameIfHasLongName(itemName string, longNames map[string]string) string { + if longNames == nil { + return itemName + } + if longName, ok := longNames[itemName]; ok { + return longName + } + return itemName +} + // checkAndReadDir is a wrapper around fs.DirExists and fs.ReadDir that does checks // and returns errors if directory cannot be read. func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) { @@ -183,7 +192,7 @@ func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) { // ParseGroupVersions extracts the versions for each API Group from the backup // directory names and stores them in a metav1 APIGroup object. -func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, error) { +func (p *Parser) ParseGroupVersions(dir string, longNames map[string]string) (map[string]metav1.APIGroup, error) { resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir) // Get the subdirectories inside the "resources" directory. The subdirectories @@ -197,8 +206,9 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err // Loop through the resource.group directory names. for _, rgd := range rgDirs { + rgdName := itemNameIfHasLongName(rgd.Name(), longNames) group := metav1.APIGroup{ - Name: extractGroupName(rgd.Name()), + Name: extractGroupName(rgdName), } rgdPath := filepath.Join(resourcesDir, rgd.Name()) @@ -213,7 +223,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err var supportedVersions []metav1.GroupVersionForDiscovery for _, gvd := range gvDirs { - gvdName := gvd.Name() + gvdName := itemNameIfHasLongName(gvd.Name(), longNames) // Don't save the namespaces or clusters directories in list of // supported API Group Versions. @@ -241,7 +251,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err group.Versions = supportedVersions - resourceAGs[rgd.Name()] = group + resourceAGs[rgdName] = group } return resourceAGs, nil diff --git a/pkg/archive/parser_test.go b/pkg/archive/parser_test.go index c9ad517480..9a31af0a4e 100644 --- a/pkg/archive/parser_test.go +++ b/pkg/archive/parser_test.go @@ -32,6 +32,7 @@ func TestParse(t *testing.T) { name string files []string dir string + longNames map[string]string wantErrMsg error want map[string]*ResourceItems }{ @@ -89,6 +90,59 @@ func TestParse(t *testing.T) { }, }, }, + { + name: "a mix of cluster-scoped and namespaced items across multiple resources are correctly returned", + dir: "root-dir", + longNames: map[string]string{ + "somesha256": "item-1", + "longapinamesha256": "thelongestapinameyouhaveeverseenever", + }, + files: []string{ + "root-dir/resources/widgets.foo/cluster/somesha256.json", + "root-dir/resources/widgets.foo/cluster/item-2.json", + "root-dir/resources/widgets.foo/namespaces/ns-1/item-1.json", + "root-dir/resources/widgets.foo/namespaces/ns-1/item-2.json", + "root-dir/resources/widgets.foo/namespaces/ns-2/item-1.json", + "root-dir/resources/longapinamesha256/namespaces/ns-2/item-2.json", + + "root-dir/resources/dongles.foo/cluster/item-3.json", + "root-dir/resources/dongles.foo/cluster/item-4.json", + + "root-dir/resources/dongles.bar/namespaces/ns-3/item-3.json", + "root-dir/resources/dongles.bar/namespaces/ns-3/item-4.json", + "root-dir/resources/dongles.bar/namespaces/ns-4/item-5.json", + "root-dir/resources/dongles.bar/namespaces/ns-4/item-6.json", + }, + want: map[string]*ResourceItems{ + "widgets.foo": { + GroupResource: "widgets.foo", + ItemsByNamespace: map[string][]string{ + "": {"item-2", "item-1"}, + "ns-1": {"item-1", "item-2"}, + "ns-2": {"item-1"}, + }, + }, + "thelongestapinameyouhaveeverseenever": { + GroupResource: "thelongestapinameyouhaveeverseenever", + ItemsByNamespace: map[string][]string{ + "ns-2": {"item-2"}, + }, + }, + "dongles.foo": { + GroupResource: "dongles.foo", + ItemsByNamespace: map[string][]string{ + "": {"item-3", "item-4"}, + }, + }, + "dongles.bar": { + GroupResource: "dongles.bar", + ItemsByNamespace: map[string][]string{ + "ns-3": {"item-3", "item-4"}, + "ns-4": {"item-5", "item-6"}, + }, + }, + }, + }, } for _, tc := range tests { @@ -108,7 +162,7 @@ func TestParse(t *testing.T) { } } - res, err := p.Parse(tc.dir) + res, err := p.Parse(tc.dir, tc.longNames) if tc.wantErrMsg != nil { assert.ErrorIs(t, err, tc.wantErrMsg, "Error should be: %v, got: %v", tc.wantErrMsg, err) } else { @@ -125,6 +179,7 @@ func TestParseGroupVersions(t *testing.T) { files []string backupDir string wantErrMsg error + longNames map[string]string want map[string]metav1.APIGroup }{ { @@ -203,6 +258,48 @@ func TestParseGroupVersions(t *testing.T) { }, }, }, + { + name: "when resource directories and files have long names, they are correctly mapped and parsed", + backupDir: "/var/folders", + longNames: map[string]string{ + "longhash123": "pods", + "longhash456": "deployments.apps", + "filehash789": "very-long-pod-name-that-exceeds-normal-length-limits-123456789", + "filehashABC": "another-long-deployment-name-with-many-characters-123456789", + }, + files: []string{ + "/var/folders/resources/longhash123/v1-preferredversion/namespaces/default/filehash789.json", + "/var/folders/resources/longhash456/v1-preferredversion/namespaces/default/filehashABC.json", + }, + want: map[string]metav1.APIGroup{ + "pods": { + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + { + GroupVersion: "v1", + Version: "v1", + }, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + "deployments.apps": { + Name: "apps", + Versions: []metav1.GroupVersionForDiscovery{ + { + GroupVersion: "apps/v1", + Version: "v1", + }, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "apps/v1", + Version: "v1", + }, + }, + }, + }, } for _, tc := range tests { @@ -222,7 +319,7 @@ func TestParseGroupVersions(t *testing.T) { } } - res, err := p.ParseGroupVersions(tc.backupDir) + res, err := p.ParseGroupVersions(tc.backupDir, tc.longNames) if tc.wantErrMsg != nil { assert.ErrorIs(t, err, tc.wantErrMsg, "Error should be: %v, got: %v", tc.wantErrMsg, err) } else { diff --git a/pkg/restore/prioritize_group_version.go b/pkg/restore/prioritize_group_version.go index e3a01c03db..54dabab011 100644 --- a/pkg/restore/prioritize_group_version.go +++ b/pkg/restore/prioritize_group_version.go @@ -155,7 +155,7 @@ func (ctx *restoreContext) gatherSourceTargetUserGroupVersions() ( map[string]metav1.APIGroup, error, ) { - sourceRGVersions, err := archive.NewParser(ctx.log, ctx.fileSystem).ParseGroupVersions(ctx.restoreDir) + sourceRGVersions, err := archive.NewParser(ctx.log, ctx.fileSystem).ParseGroupVersions(ctx.restoreDir, ctx.restoreDirLongNames) if err != nil { return nil, nil, nil, errors.Wrap(err, "parsing versions from directory names") } diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index 619db5b87e..6f7c4aa0e4 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -335,6 +335,7 @@ type restoreContext struct { backupReader io.Reader restore *velerov1api.Restore restoreDir string + restoreDirLongNames map[string]string resourceIncludesExcludes *collections.IncludesExcludes resourceStatusIncludesExcludes *collections.IncludesExcludes namespaceIncludesExcludes *collections.IncludesExcludes @@ -425,7 +426,7 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) { ctx.log.Infof("Starting restore of backup %s", kube.NamespaceAndName(ctx.backup)) - dir, err := archive.NewExtractor(ctx.log, ctx.fileSystem).UnzipAndExtractBackup(ctx.backupReader) + dir, longNames, err := archive.NewExtractor(ctx.log, ctx.fileSystem).UnzipAndExtractBackup(ctx.backupReader) if err != nil { ctx.log.Infof("error unzipping and extracting: %v", err) errs.AddVeleroError(err) @@ -455,8 +456,9 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) { // Need to set this for additionalItems to be restored. ctx.restoreDir = dir + ctx.restoreDirLongNames = longNames - backupResources, err := archive.NewParser(ctx.log, ctx.fileSystem).Parse(ctx.restoreDir) + backupResources, err := archive.NewParser(ctx.log, ctx.fileSystem).Parse(ctx.restoreDir, ctx.restoreDirLongNames) // If ErrNotExist occurs, it implies that the backup to be restored includes zero items. // Need to add a warning about it and jump out of the function. if errors.Cause(err) == archive.ErrNotExist {