Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

verify previous path state before backup #4630

Merged
merged 5 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
- Handle OneDrive folders being deleted and recreated midway through a backup
- Automatically re-run a full delta query on incrmental if the prior backup is found to have malformed prior-state information.

## [v0.15.0] (beta) - 2023-10-31

Expand Down
129 changes: 93 additions & 36 deletions src/internal/m365/collection/drive/collections.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,88 @@ func NewCollections(
}
}

func deserializeMetadata(
func deserializeAndValidateMetadata(
ctx context.Context,
cols []data.RestoreCollection,
fb *fault.Bus,
) (map[string]string, map[string]map[string]string, bool, error) {
deltas, prevs, canUse, err := DeserializeMetadata(ctx, cols)
if err != nil || !canUse {
return deltas, prevs, false, clues.Stack(err).OrNil()
}

// Go through and remove delta tokens if we didn't have any paths for them
// or one or more paths are empty (incorrect somehow). This will ensure we
// don't accidentally try to pull in delta results when we should have
// enumerated everything instead.
//
// Loop over the set of previous deltas because it's alright to have paths
// without a delta but not to have a delta without paths. This way ensures
// we check at least all the path sets for the deltas we have.
for drive := range deltas {
ictx := clues.Add(ctx, "drive_id", drive)

paths := prevs[drive]
if len(paths) == 0 {
logger.Ctx(ictx).Info("dropping drive delta due to 0 prev paths")
delete(deltas, drive)
ryanfkeepers marked this conversation as resolved.
Show resolved Hide resolved
}

// Drives have only a single delta token. If we find any folder that
// seems like the path is bad we need to drop the entire token and start
// fresh. Since we know the token will be gone we can also stop checking
// for other possibly incorrect folder paths.
for _, prevPath := range paths {
if len(prevPath) == 0 {
logger.Ctx(ictx).Info("dropping drive delta due to 0 len path")
delete(deltas, drive)

break
}
}
}

alertIfPrevPathsHaveCollisions(ctx, prevs, fb)

return deltas, prevs, canUse, nil
}

func alertIfPrevPathsHaveCollisions(
ctx context.Context,
prevs map[string]map[string]string,
fb *fault.Bus,
) {
for driveID, folders := range prevs {
prevPathCollisions := map[string]string{}

for fid, prev := range folders {
if otherID, collision := prevPathCollisions[prev]; collision {
ctx = clues.Add(
ctx,
"collision_folder_id_1", fid,
"collision_folder_id_2", otherID,
"collision_drive_id", driveID,
"collision_prev_path", path.LoggableDir(prev))

fb.AddAlert(ctx, fault.NewAlert(
fault.AlertPreviousPathCollision,
"", // no namespace
"", // no item id
"previousPaths",
map[string]any{
"collision_folder_id_1": fid,
"collision_folder_id_2": otherID,
"collision_drive_id": driveID,
"collision_prev_path": prev,
}))
}

prevPathCollisions[prev] = fid
}
}
}

func DeserializeMetadata(
ctx context.Context,
cols []data.RestoreCollection,
) (map[string]string, map[string]map[string]string, bool, error) {
Expand Down Expand Up @@ -137,32 +218,6 @@ func deserializeMetadata(
}
}
}

// Go through and remove delta tokens if we didn't have any paths for them
// or one or more paths are empty (incorrect somehow). This will ensure we
// don't accidentally try to pull in delta results when we should have
// enumerated everything instead.
//
// Loop over the set of previous deltas because it's alright to have paths
// without a delta but not to have a delta without paths. This way ensures
// we check at least all the path sets for the deltas we have.
for drive := range prevDeltas {
paths := prevFolders[drive]
if len(paths) == 0 {
delete(prevDeltas, drive)
}

// Drives have only a single delta token. If we find any folder that
// seems like the path is bad we need to drop the entire token and start
// fresh. Since we know the token will be gone we can also stop checking
// for other possibly incorrect folder paths.
for _, prevPath := range paths {
if len(prevPath) == 0 {
delete(prevDeltas, drive)
break
}
}
}
}

// if reads from items failed, return empty but no error
Expand Down Expand Up @@ -215,7 +270,7 @@ func (c *Collections) Get(
ssmb *prefixmatcher.StringSetMatchBuilder,
errs *fault.Bus,
) ([]data.BackupCollection, bool, error) {
prevDriveIDToDelta, oldPrevPathsByDriveID, canUsePrevBackup, err := deserializeMetadata(ctx, prevMetadata)
deltasByDriveID, prevPathsByDriveID, canUsePrevBackup, err := deserializeAndValidateMetadata(ctx, prevMetadata, errs)
if err != nil {
return nil, false, err
}
Expand All @@ -224,7 +279,7 @@ func (c *Collections) Get(

driveTombstones := map[string]struct{}{}

for driveID := range oldPrevPathsByDriveID {
for driveID := range prevPathsByDriveID {
driveTombstones[driveID] = struct{}{}
}

Expand Down Expand Up @@ -257,8 +312,8 @@ func (c *Collections) Get(
"drive_name", clues.Hide(driveName))

excludedItemIDs = map[string]struct{}{}
oldPrevPaths = oldPrevPathsByDriveID[driveID]
prevDeltaLink = prevDriveIDToDelta[driveID]
oldPrevPaths = prevPathsByDriveID[driveID]
prevDeltaLink = deltasByDriveID[driveID]

// packagePaths is keyed by folder paths to a parent directory
// which is marked as a package by its driveItem GetPackage
Expand Down Expand Up @@ -426,6 +481,8 @@ func (c *Collections) Get(
collections = append(collections, coll)
}

alertIfPrevPathsHaveCollisions(ctx, driveIDToPrevPaths, errs)

// add metadata collections
pathPrefix, err := c.handler.MetadataPathPrefix(c.tenantID)
if err != nil {
Expand Down Expand Up @@ -1012,13 +1069,13 @@ func includePath(ctx context.Context, dsc dirScopeChecker, folderPath path.Path)
}

func updatePath(paths map[string]string, id, newPath string) {
oldPath := paths[id]
if len(oldPath) == 0 {
currPath := paths[id]
if len(currPath) == 0 {
paths[id] = newPath
return
}

if oldPath == newPath {
if currPath == newPath {
return
}

Expand All @@ -1027,10 +1084,10 @@ func updatePath(paths map[string]string, id, newPath string) {
// other components should take care of that. We do need to ensure that the
// resulting map contains all folders though so we know the next time around.
for folderID, p := range paths {
if !strings.HasPrefix(p, oldPath) {
if !strings.HasPrefix(p, currPath) {
continue
}

paths[folderID] = strings.Replace(p, oldPath, newPath, 1)
paths[folderID] = strings.Replace(p, currPath, newPath, 1)
}
}
Loading
Loading