Skip to content

Commit

Permalink
Properly handle archive
Browse files Browse the repository at this point in the history
Issue: BB-590
  • Loading branch information
francoisferrand committed Dec 20, 2024
1 parent 769a387 commit f5995a2
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 150 deletions.
215 changes: 109 additions & 106 deletions extensions/mongoProcessor/MongoQueueProcessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -195,33 +195,23 @@ class MongoQueueProcessor {
], done);
}

_getZenkoObjectMetadata(log, entry, bucketInfo, done) {
// NOTE: This is used for updating replication info, as well as validating the
// `x-amz-meta-scal-version-id` header. If the Zenko bucket does not have repInfo set and
// the header is not set, then we can ignore fetching
const bucketRepInfo = bucketInfo.getReplicationConfiguration();
// KO for DeleteOpQueueEntry : no such field (and no metadata...)
const scalVersionId = entry.getValue ? entry.getValue()['x-amz-meta-scal-version-id'] : undefined;
if (!(entry instanceof DeleteOpQueueEntry) &&
!scalVersionId &&
(!bucketRepInfo || !bucketRepInfo.rules || !bucketRepInfo.rules[0].enabled)) {
return done();
}

/**
* Retrieve Zenko object metadata from MongoDB
* @param {Logger} log The logger object
* @param {ObjectQueueEntry|DeleteOpQueueEntry} entry The entry to being processed
* @param {string} versionId The version id of the object
* @param {function} done The callback function
* @returns {undefined}
*/
_getZenkoObjectMetadata(log, entry, versionId, done) {
const bucket = entry.getBucket();
const key = entry.getObjectKey();
const params = {};

// Use x-amz-meta-scal-version-id if provided, instead of the actual versionId of the object.
// This should happen only for restored objects : in all other situations, both the source
// and ingested objects should have the same version id (and not x-amz-meta-scal-version-id
// metadata).
const versionId = VersionID.decode(scalVersionId) || entry.getVersionId();

// master keys with a 'null' version id comming from
// a versioning suspended bucket are considered a version
// we should not specify the version id in this case
if (versionId && !entry.getIsNull()) {
if (versionId && !(entry.getIsNull && entry.getIsNull())) {
params.versionId = versionId;
}

Expand Down Expand Up @@ -411,35 +401,59 @@ class MongoQueueProcessor {
const key = sourceEntry.getObjectKey();
const versionId = extractVersionId(sourceEntry.getObjectVersionedKey());

const options = versionId ? { versionId } : undefined;

// Calling deleteObject with undefined options to use deleteObjectNoVer which is used for
// deleting non versioned objects that only have master keys.
// When deleting a versioned object however we supply the version id in the options, which
// causes the function to call the deleteObjectVer function that is used to handle objects that
// have both a master and version keys. This handles the deletion of both the version and the master
// keys in the case where no other version is available, or deleting the version and updating the
// master key otherwise.
return this._mongoClient.deleteObject(bucket, key, options, log,
err => {
if (err) {
this._normalizePendingMetric(location);
log.end().error('error deleting object metadata ' +
'from mongo', {
bucket,
key,
error: err.message,
this.logger.debug('processing object delete', { bucket, key, versionId });

async.waterfall([
cb => this._getZenkoObjectMetadata(log, sourceEntry, versionId, cb),
(zenkoObjMd, cb) => {
if (zenkoObjMd.dataStoreName !== location) {
log.end().info('ignore delete entry, transitioned to another location', {
entry: sourceEntry.getLogInfo(),
location,
});
return done(err);
return done();
}
this._produceMetricCompletionEntry(location);
log.end().info('object metadata deleted from mongo', {

return cb();
},
cb => {
// Calling deleteObject with undefined options to use deleteObjectNoVer which is used for
// deleting non versioned objects that only have master keys.
// When deleting a versioned object however we supply the version id in the options, which
// causes the function to call the deleteObjectVer function that is used to handle objects that
// have both a master and version keys. This handles the deletion of both the version and the master
// keys in the case where no other version is available, or deleting the version and updating the
// master key otherwise.
const options = versionId ? { versionId } : undefined;

return this._mongoClient.deleteObject(bucket, key, options, log, cb);
},
], err => {
if (err?.is.NoSuchKey) {
log.end().info('skipping delete entry', {
entry: sourceEntry.getLogInfo(),
location,
});
return done();
}
if (err) {
this._normalizePendingMetric(location);
log.end().error('error deleting object metadata ' +
'from mongo', {
bucket,
key,
error: err.message,
location,
});
return done(err);
}
this._produceMetricCompletionEntry(location);
log.end().info('object metadata deleted from mongo', {
entry: sourceEntry.getLogInfo(),
location,
});
return done();
});
}

/**
Expand All @@ -454,14 +468,29 @@ class MongoQueueProcessor {
_processObjectQueueEntry(log, sourceEntry, location, bucketInfo, done) {
const bucket = sourceEntry.getBucket();
const key = sourceEntry.getObjectKey();
const scalVersionId = sourceEntry.getValue()['x-amz-meta-scal-version-id'];

this.logger.info('processing object metadata', {
bucket, key, scalVersionId: sourceEntry.getValue()['x-amz-meta-scal-version-id'],
});
this.logger.debug('processing object metadata', { bucket, key, scalVersionId });

this._getZenkoObjectMetadata(log, sourceEntry, bucketInfo,
(err, zenkoObjMd) => {
if (err) {
const maybeGetZenkoObjectMetadata = cb => {
// NOTE: ZenkoObjMD is used for updating replication info, as well as validating the
// `x-amz-meta-scal-version-id` header of restored objects. If the Zenko bucket does
// not have repInfo set and the header is not set, then we can skip fetching.
const bucketRepInfo = bucketInfo.getReplicationConfiguration();
if (!scalVersionId && !bucketRepInfo?.rules?.some(r => r.enabled)) {
return cb();
}

// Use x-amz-meta-scal-version-id if provided, instead of the actual versionId of the object.
// This should happen only for restored objects : in all other situations, both the source
// and ingested objects should have the same version id (and not x-amz-meta-scal-version-id
// metadata).
const versionId = scalVersionId ? VersionID.decode(scalVersionId) : sourceEntry.getVersionId();
return this._getZenkoObjectMetadata(log, sourceEntry, versionId, cb);
};

maybeGetZenkoObjectMetadata((err, zenkoObjMd) => {
if (err && !err.NoSuchKey) {
this._normalizePendingMetric(location);
log.end().error('error processing object queue entry', {
method: 'MongoQueueProcessor._processObjectQueueEntry',
Expand All @@ -471,6 +500,35 @@ class MongoQueueProcessor {
return done(err);
}

// If the object has `x-amz-meta-scal-version-id`, we need to use it instead of the id.
// This should only happen for objects restored onto the OOB location, and the location
// should match in that case
if (scalVersionId) {
if (!zenkoObjMd) {
this.logger.warn('missing source entry, ignoring x-amz-meta-scal-version-id', {
method: 'MongoQueueProcessor._processObjectQueueEntry',
location,
});
} else if (zenkoObjMd.location[0]?.dataStoreVersionId !== sourceEntry.getVersionId()) {
this.logger.warn('mismatched source entry, ignoring x-amz-meta-scal-version-id', {
method: 'MongoQueueProcessor._processObjectQueueEntry',
location,
});
} else {
this.logger.info('restored oob object', {
bucket, key, scalVersionId, zenkoObjMd, sourceEntry
});

sourceEntry.setVersionId(scalVersionId);

// TODO: do we need to update the (mongo) metadata in that case???
// - This may happen if object is re-tagged while restored...
// - Need to cleanup scal version id: delete objVal['x-amz-meta-scal-version-id'];
// - Need to keep the archive & restore fields in the metadata
return done();
}
}

const content = getContentType(sourceEntry, zenkoObjMd);
if (content.length === 0) {
this._normalizePendingMetric(location);
Expand Down Expand Up @@ -498,33 +556,6 @@ class MongoQueueProcessor {
const objVal = sourceEntry.getValue();
const params = {};

// If the object has `x-amz-meta-scal-version-id`, we need to use it instead of the id.
// This should only happen for objects restored onto the OOB location, and the location
// should match in that case
const scalVersionId = objVal['x-amz-meta-scal-version-id'];
if (scalVersionId) {

this.logger.info('restored oob object', {
bucket, key, scalVersionId, zenkoObjMd, sourceEntry
});

if (!zenkoObjMd) {
this.logger.warn('missing source entry, ignoring x-amz-meta-scal-version-id', {
method: 'MongoQueueProcessor._processObjectQueueEntry',
location,
});
} else if (zenkoObjMd.location[0]?.dataStoreVersionId !== sourceEntry.getVersionId()) {
this.logger.warn('mismatched source entry, ignoring x-amz-meta-scal-version-id', {
method: 'MongoQueueProcessor._processObjectQueueEntry',
location,
});
} else {
sourceEntry.setVersionId(zenkoObjMd.versionId);
delete objVal['x-amz-meta-scal-version-id'];
delete objVal['x-amz-meta-scal-restore-info'];
}
}

// Versioning suspended entries will have a version id but also a isNull tag.
// These master keys are considered a version and do not have a duplicate version,
// we don't specify the version id and repairMaster in this case
Expand Down Expand Up @@ -654,7 +685,7 @@ class MongoQueueProcessor {
const log = this.logger.newRequestLogger();
const sourceEntry = QueueEntry.createFromKafkaEntry(kafkaEntry);

this.logger.info('processing kafka entry', { sourceEntry });
this.logger.trace('processing kafka entry', { sourceEntry });

if (sourceEntry.error) {
log.end().error('error processing source entry',
Expand All @@ -671,37 +702,9 @@ class MongoQueueProcessor {
const location = bucketInfo.getLocationConstraint();

if (sourceEntry instanceof DeleteOpQueueEntry) {
return this._getZenkoObjectMetadata(log, sourceEntry, bucketInfo, (err, zenkoObjMd) => {
if (err) {
this._normalizePendingMetric(location);
return done(err);
}

// TODO: use getReducedLocation() ? or x-amz-storage-class ? or dataStoreName ?
// * x-amz-storage-class --> will always indicate the "cold" location
// * dataStoreName will be cold when archived, and "oob" when restored
// ==> we update the data store name (and x-amz-storage-class) before actually
// removing the data so we should look at this: so we can ignore when there is
// a match (e.g. lifecycle gc) and process when the event when user deletes
// the (restored) object
//
// TODO: two cases for restored object to test
// - expiring restored object from Sorbet --> delete from Zenko, should work fine...
// - delete made on the ring (user deletes the object) --> need to be propagated to Zenko!
if (zenkoObjMd?.dataStoreName !== location) {
log.end().info('skipping delete entry with location mismatch', {
entry: sourceEntry.getLogInfo(),
location,
zenkoLocation: zenkoObjMd.location,
});
this._normalizePendingMetric(location);
return done();
}

return this._processDeleteOpQueueEntry(log, sourceEntry, location, err => {
this._handleMetrics(sourceEntry, !!err);
return done(err);
});
return this._processDeleteOpQueueEntry(log, sourceEntry, location, err => {
this._handleMetrics(sourceEntry, !!err);
return done(err);
});
}

Expand Down
Loading

0 comments on commit f5995a2

Please sign in to comment.