Skip to content

Commit

Permalink
Preserving the Original MD5 if the payload is hashed more than once
Browse files Browse the repository at this point in the history
  • Loading branch information
drivenflywheel committed Aug 19, 2024
1 parent 5d8d052 commit 8d38448
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
28 changes: 28 additions & 0 deletions src/main/java/emissary/kff/KffDataObjectHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import emissary.core.IBaseDataObject.MergePolicy;
import emissary.core.channels.SeekableByteChannelFactory;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -32,6 +33,7 @@ public class KffDataObjectHandler {
public static final String KFF_PARAM_KNOWN_FILTER_NAME = KFF_PARAM_BASE + "FILTERED_BY";
public static final String KFF_PARAM_DUPE_FILTER_NAME = KFF_PARAM_BASE + "KNOWN_BY";
public static final String KFF_DUPE_CURRENT_FORM = "KNOWN_FILE";
public static final String MD5_ORIGINAL = "MD5_ORIGINAL";

// Our kff impl
protected KffChain kff = KffChainLoader.getChainInstance();
Expand Down Expand Up @@ -185,6 +187,7 @@ public void hash(@Nullable final IBaseDataObject d) {
*/
public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws NoSuchAlgorithmException, IOException {
if (d != null) {
preserveOriginalMD5BeforeRehashing(d);
removeHash(d);
}

Expand Down Expand Up @@ -215,6 +218,31 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws
}
}

/**
* Preserve the MD5 checksum value with key MD5_ORIGINAL. If the IBDO already has a parameter with that key, do not
* overwrite it.
*
* @param d IBaseDataObject being processed
*/
static void preserveOriginalMD5BeforeRehashing(IBaseDataObject d) {
// If the IBDO already has an MD5_ORIGINAL parameter, do not overwrite it.
if (d.hasParameter(MD5_ORIGINAL)) {
return;
}

if (d.hasParameter(KFF_PARAM_MD5)) {
var paramValue = d.getParameter(KFF_PARAM_MD5);
if (!paramValue.isEmpty() && paramValue.get(0) != null) {
String originalMD5 = paramValue.get(0).toString();

// only preserve the KFF_PARAM_MD5 value if it's not blank
if (StringUtils.isNotBlank(originalMD5)) {
d.setParameter(MD5_ORIGINAL, originalMD5);
}
}
}
}

/**
* Parent info has been copied in and must be reset for the child context
*
Expand Down
7 changes: 7 additions & 0 deletions src/test/java/emissary/kff/KffDataObjectHandlerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ void testHashMethodCalledTwice() {
payload.setParameter(KffDataObjectHandler.KFF_PARAM_KNOWN_FILTER_NAME, "test.filter");
kff.hash(payload);

assertNull(payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL),
"MD5_ORIGINAL should only be populated if hashing more than once");

// hash again, to see the effect on the hash-related params.
// none of the parameters should have a duplicated value

Expand All @@ -127,6 +130,10 @@ void testHashMethodCalledTwice() {
assertEquals(DATA_SHA1, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1));
assertEquals(DATA_SHA256, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256));
assertEquals(KffDataObjectHandler.KFF_DUPE_CURRENT_FORM, payload.getFileType());
assertNotNull(payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL),
"MD5_ORIGINAL should be populated if hash called more than once");
assertEquals(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5),
payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL));
}

@Test
Expand Down

0 comments on commit 8d38448

Please sign in to comment.