diff --git a/src/main/java/emissary/kff/KffDataObjectHandler.java b/src/main/java/emissary/kff/KffDataObjectHandler.java index 7878ac3546..4e90a11b0a 100755 --- a/src/main/java/emissary/kff/KffDataObjectHandler.java +++ b/src/main/java/emissary/kff/KffDataObjectHandler.java @@ -4,6 +4,7 @@ import emissary.core.IBaseDataObject.MergePolicy; import emissary.core.channels.SeekableByteChannelFactory; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,6 +33,7 @@ public class KffDataObjectHandler { public static final String KFF_PARAM_KNOWN_FILTER_NAME = KFF_PARAM_BASE + "FILTERED_BY"; public static final String KFF_PARAM_DUPE_FILTER_NAME = KFF_PARAM_BASE + "KNOWN_BY"; public static final String KFF_DUPE_CURRENT_FORM = "KNOWN_FILE"; + public static final String MD5_ORIGINAL = "MD5_ORIGINAL"; // Our kff impl protected KffChain kff = KffChainLoader.getChainInstance(); @@ -185,6 +187,7 @@ public void hash(@Nullable final IBaseDataObject d) { */ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws NoSuchAlgorithmException, IOException { if (d != null) { + preserveOriginalMD5BeforeRehashing(d); removeHash(d); } @@ -215,6 +218,31 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws } } + /** + * Preserve the MD5 checksum value with key MD5_ORIGINAL. If the IBDO already has a parameter with that key, do not + * overwrite it. + * + * @param d IBaseDataObject being processed + */ + static void preserveOriginalMD5BeforeRehashing(IBaseDataObject d) { + // If the IBDO already has an MD5_ORIGINAL parameter, do not overwrite it. + if (d.hasParameter(MD5_ORIGINAL)) { + return; + } + + if (d.hasParameter(KFF_PARAM_MD5)) { + var paramValue = d.getParameter(KFF_PARAM_MD5); + if (!paramValue.isEmpty() && paramValue.get(0) != null) { + String originalMD5 = paramValue.get(0).toString(); + + // only preserve the KFF_PARAM_MD5 value if it's not blank + if (StringUtils.isNotBlank(originalMD5)) { + d.setParameter(MD5_ORIGINAL, originalMD5); + } + } + } + } + /** * Parent info has been copied in and must be reset for the child context * diff --git a/src/test/java/emissary/kff/KffDataObjectHandlerTest.java b/src/test/java/emissary/kff/KffDataObjectHandlerTest.java index af27fd3bae..9e121f46db 100644 --- a/src/test/java/emissary/kff/KffDataObjectHandlerTest.java +++ b/src/test/java/emissary/kff/KffDataObjectHandlerTest.java @@ -115,6 +115,9 @@ void testHashMethodCalledTwice() { payload.setParameter(KffDataObjectHandler.KFF_PARAM_KNOWN_FILTER_NAME, "test.filter"); kff.hash(payload); + assertNull(payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL), + "MD5_ORIGINAL should only be populated if hashing more than once"); + // hash again, to see the effect on the hash-related params. // none of the parameters should have a duplicated value @@ -127,6 +130,10 @@ void testHashMethodCalledTwice() { assertEquals(DATA_SHA1, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1)); assertEquals(DATA_SHA256, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256)); assertEquals(KffDataObjectHandler.KFF_DUPE_CURRENT_FORM, payload.getFileType()); + assertNotNull(payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL), + "MD5_ORIGINAL should be populated if hash called more than once"); + assertEquals(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5), + payload.getStringParameter(KffDataObjectHandler.MD5_ORIGINAL)); } @Test