From 5d01bf74ab44bab65568dea020516cadb4c9d321 Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Thu, 14 Jul 2022 19:30:03 -0300 Subject: [PATCH 1/2] Ignore missing config.v2.json files on migration We have seen some cases in which the file `/var/lib/docker/containers//config.v2.json` was not found for certain UUIDs. This causes the whole migration to fail and, worse, causes our migration cleanup code to fail. A missing `config.v2.json` indicates that directory does not contain a valid container, so we should not even try to migrate them. That's what this commit does: it makes the migration ignore directories with a missing `config.v2.json`. I don't think we should have directories like this in the first place, this is probably the side effect of some other issue. That said, Docker itself ignores these directories (after logging a warning) during startup, so here are just bringing the Engine in line with the standard behavior. Signed-off-by: Leandro Motta Barros Change-type: patch --- pkg/storagemigration/migrate.go | 8 ++++++-- pkg/storagemigration/utils.go | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/storagemigration/migrate.go b/pkg/storagemigration/migrate.go index 8a9c46974e..def78198cc 100644 --- a/pkg/storagemigration/migrate.go +++ b/pkg/storagemigration/migrate.go @@ -336,10 +336,14 @@ func SwitchAllContainersStorageDriver(root, newStorageDriver string) error { logrus.Debugf("migrating %v container(s) to %s", len(containerIDs), newStorageDriver) for _, containerID := range containerIDs { err := switchContainerStorageDriver(root, containerID, newStorageDriver) - if err != nil { + switch err { + case nil: + logrus.WithField("container_id", containerID).Debugf("reconfigured storage-driver to %s", newStorageDriver) + case errNoConfigV2JSON: + logrus.WithField("container_id", containerID).Warning("ignoring container without config.v2.json") + default: return fmt.Errorf("Error rewriting container config for %s: %v", containerID, err) } - logrus.WithField("container_id", containerID).Debugf("reconfigured storage-driver to %s", newStorageDriver) } return nil } diff --git a/pkg/storagemigration/utils.go b/pkg/storagemigration/utils.go index 4ce0fe6690..bf3ff945d5 100644 --- a/pkg/storagemigration/utils.go +++ b/pkg/storagemigration/utils.go @@ -2,6 +2,7 @@ package storagemigration import ( "encoding/json" + "errors" "os" "path/filepath" @@ -11,6 +12,8 @@ import ( "golang.org/x/sys/unix" ) +var errNoConfigV2JSON = errors.New("config.v2.json not found for container") + // exists checks if a file (or if isDir is set to "true" a directory) exists func exists(path string, isDir bool) (bool, error) { fi, err := os.Stat(path) @@ -60,6 +63,10 @@ func replicate(sourceDir, targetDir string) error { // this is the only change needed to make it work after the migration func switchContainerStorageDriver(root, containerID, newStorageDriver string) error { containerConfigPath := filepath.Join(root, "containers", containerID, "config.v2.json") + if ok, _ := exists(containerConfigPath, false); !ok { + return errNoConfigV2JSON + } + f, err := os.OpenFile(containerConfigPath, os.O_RDWR, os.ModePerm) if err != nil { return err From 3915950592fcef45d1270ee7d2debd46dea09957 Mon Sep 17 00:00:00 2001 From: zoobot Date: Fri, 11 Nov 2022 15:52:39 -0800 Subject: [PATCH 2/2] Rollback to aufs and cleanup overlay2 in case of missing config.v2.json Add test for missing config.v2.json Changelog-entry: Rollback to aufs and cleanup overlay2 in case of missing config.v2.json Change-type: patch Signed-off-by: zoobot --- pkg/storagemigration/failcleanup.go | 1 - pkg/storagemigration/migrate.go | 6 +++-- pkg/storagemigration/migrate_test.go | 35 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pkg/storagemigration/failcleanup.go b/pkg/storagemigration/failcleanup.go index 5e61045d71..5d24c157be 100644 --- a/pkg/storagemigration/failcleanup.go +++ b/pkg/storagemigration/failcleanup.go @@ -10,7 +10,6 @@ import ( // FailCleanup should be run after a failed migration. // It will remove any files left over from the migration process // and migrate containers back to aufs. -// func failCleanup(root string) error { logrus.WithField("storage_root", root).Warning("recovering from failed aufs to overlay migration") diff --git a/pkg/storagemigration/migrate.go b/pkg/storagemigration/migrate.go index def78198cc..b37c77f440 100644 --- a/pkg/storagemigration/migrate.go +++ b/pkg/storagemigration/migrate.go @@ -128,7 +128,6 @@ func Migrate(root string) (err error) { if err != nil { return fmt.Errorf("Error migrating containers to overlay2: %v", err) } - return nil } @@ -340,7 +339,10 @@ func SwitchAllContainersStorageDriver(root, newStorageDriver string) error { case nil: logrus.WithField("container_id", containerID).Debugf("reconfigured storage-driver to %s", newStorageDriver) case errNoConfigV2JSON: - logrus.WithField("container_id", containerID).Warning("ignoring container without config.v2.json") + if newStorageDriver == "overlay2" { + return fmt.Errorf("Error containerID %s: %v", containerID, err) + } + logrus.WithField("container_id", containerID).Errorf("has no config.v2.json %s", newStorageDriver) default: return fmt.Errorf("Error rewriting container config for %s: %v", containerID, err) } diff --git a/pkg/storagemigration/migrate_test.go b/pkg/storagemigration/migrate_test.go index f041f70352..9c11b0f580 100644 --- a/pkg/storagemigration/migrate_test.go +++ b/pkg/storagemigration/migrate_test.go @@ -41,6 +41,7 @@ func setup(t *testing.T) (*fs.Dir, *State, func()) { ), ), ) + state := &State{ Layers: []Layer{ { @@ -132,3 +133,37 @@ func TestFailCleanup(t *testing.T) { _, err = os.Stat(root.Join("aufs")) assert.NilError(t, err) } + +func TestFailCleanupContainer(t *testing.T) { + root, _, cleanup := setup(t) + defer cleanup() + + // delete config.v2.json to force SwitchAllContainersStorageDriver to fail + os.Remove(root.Join("containers", "bebe92422caf828ab21ae39974a0c003a29970ec09c6e5529bbb24f71eb9ca2ef", "config.v2.json")) + + err := Migrate(root.Path()) + if err == nil { + // won't err on rollback to aufs fail + assert.NilError(t, err) + } else { + // only errs on migration to overlay2 fail + assert.ErrorContains(t, err, "bebe92422caf828ab21ae39974a0c003a29970ec09c6e5529bbb24f71eb9ca2ef") + + // // config.v2.json should not exists + _, err = os.Stat(root.Join("containers", "bebe92422caf828ab21ae39974a0c003a29970ec09c6e5529bbb24f71eb9ca2ef", "config.v2.json")) + assert.ErrorType(t, err, os.IsNotExist) + + // migration logfile should exists + _, err = os.Stat(root.Join("migrate.log")) + assert.NilError(t, err) + + // overlay2 directory should not exists + _, err = os.Stat(root.Join("overlay2")) + println("err", root.Join("overlay2"), err) + assert.ErrorType(t, err, os.IsNotExist) + + // aufs directory should still exists + _, err = os.Stat(root.Join("aufs")) + assert.NilError(t, err) + } +}