diff --git a/src/main/java/emissary/core/IBaseDataObjectHelper.java b/src/main/java/emissary/core/IBaseDataObjectHelper.java index 9831b28bea..7a9687cdac 100644 --- a/src/main/java/emissary/core/IBaseDataObjectHelper.java +++ b/src/main/java/emissary/core/IBaseDataObjectHelper.java @@ -10,7 +10,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.security.NoSuchAlgorithmException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -153,11 +152,7 @@ public static void addParentInformationToChild(final IBaseDataObject parentIBase KffDataObjectHandler.parentToChild(childIBaseDataObject); // Hash the new child data, overwrites parent hashes if any - try { - kffDataObjectHandler.hash(childIBaseDataObject, true); - } catch (NoSuchAlgorithmException | IOException e) { - // Do not add the hash parameters - } + kffDataObjectHandler.hash(childIBaseDataObject, true); } /** diff --git a/src/main/java/emissary/core/channels/SeekableByteChannelHelper.java b/src/main/java/emissary/core/channels/SeekableByteChannelHelper.java index 5ceba35feb..21a1ce94bc 100644 --- a/src/main/java/emissary/core/channels/SeekableByteChannelHelper.java +++ b/src/main/java/emissary/core/channels/SeekableByteChannelHelper.java @@ -19,6 +19,9 @@ public final class SeekableByteChannelHelper { private static final Logger logger = LoggerFactory.getLogger(SeekableByteChannelHelper.class); + /** Channel factory backed by an empty byte array. Used for situations when a BDO should have its payload discarded. */ + public static final SeekableByteChannelFactory EMPTY_CHANNEL_FACTORY = memory(new byte[0]); + private SeekableByteChannelHelper() {} /** diff --git a/src/main/java/emissary/kff/KffChain.java b/src/main/java/emissary/kff/KffChain.java index 5516647f61..9700139a8c 100755 --- a/src/main/java/emissary/kff/KffChain.java +++ b/src/main/java/emissary/kff/KffChain.java @@ -120,7 +120,7 @@ public List getAlgorithms() { * * @return result of check */ - public KffResult check(final String itemName, final byte[] content) throws Exception { + public KffResult check(final String itemName, final byte[] content) throws NoSuchAlgorithmException { final ChecksumResults sums = computeSums(content); KffResult answer = null; if (content.length < kffMinDataSize || list.isEmpty()) { diff --git a/src/main/java/emissary/kff/KffDataObjectHandler.java b/src/main/java/emissary/kff/KffDataObjectHandler.java index 5f26254916..b4ba35fad0 100755 --- a/src/main/java/emissary/kff/KffDataObjectHandler.java +++ b/src/main/java/emissary/kff/KffDataObjectHandler.java @@ -3,6 +3,7 @@ import emissary.core.IBaseDataObject; import emissary.core.IBaseDataObject.MergePolicy; import emissary.core.channels.SeekableByteChannelFactory; +import emissary.core.channels.SeekableByteChannelHelper; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -71,7 +72,7 @@ public KffDataObjectHandler(boolean truncateKnownData, boolean setFormOnKnownDat } /** - * Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file + * Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file * status if so configured * * @param data the bytes to hash @@ -83,81 +84,88 @@ public Map hashData(byte[] data, String name) { } /** - * Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file + * Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file * status if so configured * * @param data the bytes to hash - * @param name th name of the data (for reporting) + * @param name the name of the data (for reporting) * @param prefix prepended to hash name entries * @return parameter entries suitable for a BaseDataObject */ public Map hashData(@Nullable byte[] data, String name, @Nullable String prefix) { - Map results = new HashMap<>(); if (prefix == null) { prefix = ""; } + KffResult kffCheck = null; if (data != null && data.length > 0) { try { - KffResult kffCheck = kff.check(name, data); - - // Store all computed results in data object params - for (String alg : kffCheck.getResultNames()) { - results.put(prefix + KFF_PARAM_BASE + alg, kffCheck.getResultString(alg)); - } - - // Set params if we have a hit - if (kffCheck.isKnown()) { - results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, kffCheck.getFilterName()); - } - if (kffCheck.isDupe()) { - results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, kffCheck.getFilterName()); - } - } catch (Exception kffex) { + kffCheck = kff.check(name, data); + } catch (NoSuchAlgorithmException kffex) { logger.warn("Unable to compute kff on " + name, kffex); } } - return results; + + return processKffResult(kffCheck, prefix); } /** - * Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file + * Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file * status if so configured + * + * @param sbcf the data to hash + * @param name the name of the data (for reporting) + * @return parameter entries suitable for a BaseDataObject + */ + public Map hashData(final SeekableByteChannelFactory sbcf, final String name) { + return hashData(sbcf, name, ""); + } + + /** + * Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file + * status if so configured. * * @param sbcf the data to hash - * @param name th name of the data (for reporting) + * @param name the name of the data (for reporting) * @param prefix prepended to hash name entries * @return parameter entries suitable for a BaseDataObject - * @throws IOException if the data can't be read - * @throws NoSuchAlgorithmException if the checksum can't be computed */ - public Map hashData(final SeekableByteChannelFactory sbcf, final String name, String prefix) - throws IOException, NoSuchAlgorithmException { - final Map results = new HashMap<>(); + public Map hashData(final SeekableByteChannelFactory sbcf, final String name, String prefix) { if (prefix == null) { prefix = ""; } + KffResult kffCheck = null; if (sbcf != null) { try (final SeekableByteChannel sbc = sbcf.create()) { if (sbc.size() > 0) { - final KffResult kffCheck = kff.check(name, sbcf); - - // Store all computed results in data object params - for (String alg : kffCheck.getResultNames()) { - results.put(prefix + KFF_PARAM_BASE + alg, kffCheck.getResultString(alg)); - } - - // Set params if we have a hit - if (kffCheck.isKnown()) { - results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, kffCheck.getFilterName()); - } - if (kffCheck.isDupe()) { - results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, kffCheck.getFilterName()); - } + kffCheck = kff.check(name, sbcf); } + } catch (NoSuchAlgorithmException | IOException kffex) { + logger.warn("Unable to compute kff on " + name, kffex); + } + } + + return processKffResult(kffCheck, prefix); + } + + private static Map processKffResult(KffResult result, String prefix) { + Map results = new HashMap<>(); + + if (result != null) { + // Store all computed results in data object params + for (String alg : result.getResultNames()) { + results.put(prefix + KFF_PARAM_BASE + alg, result.getResultString(alg)); + } + + // Set params if we have a hit + if (result.isKnown()) { + results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, result.getFilterName()); + } + if (result.isDupe()) { + results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, result.getFilterName()); } } @@ -170,11 +178,7 @@ public Map hashData(final SeekableByteChannelFactory sbcf, final * @param d the data object */ public void hash(@Nullable final IBaseDataObject d) { - try { - hash(d, false); - } catch (NoSuchAlgorithmException | IOException e) { - // Do nothing - } + hash(d, false); } /** @@ -182,10 +186,8 @@ public void hash(@Nullable final IBaseDataObject d) { * * @param d the data object * @param useSbc use the {@link SeekableByteChannel} interface - * @throws IOException if the data can't be read - * @throws NoSuchAlgorithmException if the checksum can't be computed */ - public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws NoSuchAlgorithmException, IOException { + public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) { if (d == null) { return; @@ -203,6 +205,8 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws } else { return; // NOSONAR } + } catch (IOException e) { + logger.error("Couldn't hash data {}", d.shortName()); } finally { // preserve the original MD5 only if 1) we hadn't already done so and 2) rehashing produced a new MD5 value if (!d.hasParameter(MD5_ORIGINAL) && previouslyComputedMd5HasChanged(d, originalMD5)) { @@ -219,6 +223,9 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws d.replaceCurrentForm(KFF_DUPE_CURRENT_FORM); } if (truncateKnownData) { + if (useSbc) { + d.setChannelFactory(SeekableByteChannelHelper.EMPTY_CHANNEL_FACTORY); + } d.setData(null); } } @@ -229,6 +236,7 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws * * @param d IBaseDataObject being processed */ + @Nullable static String captureOriginalMD5BeforeRehashing(IBaseDataObject d) { // If the IBDO already has an MD5_ORIGINAL parameter, return null. if (d.hasParameter(MD5_ORIGINAL)) { diff --git a/src/main/java/emissary/place/KffHashPlace.java b/src/main/java/emissary/place/KffHashPlace.java index 74c2e6719c..c740c9244d 100644 --- a/src/main/java/emissary/place/KffHashPlace.java +++ b/src/main/java/emissary/place/KffHashPlace.java @@ -6,7 +6,6 @@ import java.io.IOException; import java.io.InputStream; -import java.security.NoSuchAlgorithmException; /** * Hashing place to hash payload unless hashes are set or skip flag is set. This place is intended to execute in the @@ -62,11 +61,7 @@ public void process(IBaseDataObject payload) throws ResourceException { return; } - try { - kff.hash(payload, useSbc); - } catch (final NoSuchAlgorithmException | IOException e) { - logger.error("KffHashPlace failed to hash data for {} - this shouldn't happen", payload.shortName(), e); - } + kff.hash(payload, useSbc); } } diff --git a/src/test/java/emissary/core/IBaseDataObjectHelperTest.java b/src/test/java/emissary/core/IBaseDataObjectHelperTest.java index aec9087289..68155ec09e 100644 --- a/src/test/java/emissary/core/IBaseDataObjectHelperTest.java +++ b/src/test/java/emissary/core/IBaseDataObjectHelperTest.java @@ -14,7 +14,6 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; -import java.security.NoSuchAlgorithmException; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; @@ -314,7 +313,6 @@ void testAddParentInformationToChild() throws Exception { final IBaseDataObject childIbdo1 = new BaseDataObject(); childIbdo1.setChannelFactory(InMemoryChannelFactory.create("0123456789".getBytes(StandardCharsets.US_ASCII))); - Mockito.doThrow(NoSuchAlgorithmException.class).when(mockKffDataObjectHandler1).hash(Mockito.any(BaseDataObject.class), Mockito.anyBoolean()); IBaseDataObjectHelper.addParentInformationToChild(parentIbdo, childIbdo1, true, alwaysCopyMetadataKeys, placeKey, mockKffDataObjectHandler1); assertFalse(KffDataObjectHandler.hashPresent(childIbdo1)); @@ -351,7 +349,7 @@ void testAddParentInformationToChildren() { } @Test - void testAddParentInformationToChildExcluding() throws Exception { + void testAddParentInformationToChildExcluding() { final IBaseDataObject parentIbdo = ibdo1; final IBaseDataObject childIbdo = ibdo2; diff --git a/src/test/java/emissary/kff/KffDataObjectHandlerTest.java b/src/test/java/emissary/kff/KffDataObjectHandlerTest.java index 1248dbd78b..33286da14e 100644 --- a/src/test/java/emissary/kff/KffDataObjectHandlerTest.java +++ b/src/test/java/emissary/kff/KffDataObjectHandlerTest.java @@ -2,6 +2,9 @@ import emissary.core.DataObjectFactory; import emissary.core.IBaseDataObject; +import emissary.core.channels.AbstractSeekableByteChannel; +import emissary.core.channels.SeekableByteChannelFactory; +import emissary.core.channels.SeekableByteChannelHelper; import emissary.test.core.junit5.UnitTest; import emissary.util.io.ResourceReader; @@ -12,11 +15,15 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.HashMap; import java.util.Map; import javax.annotation.Nullable; import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -25,6 +32,7 @@ class KffDataObjectHandlerTest extends UnitTest { static final byte[] DATA = "This is a test".getBytes(); + static final SeekableByteChannelFactory SBC_DATA = SeekableByteChannelHelper.memory("This is a test".getBytes()); // echo -n "This is a test" | openssl sha1 static final String DATA_SHA1 = "a54d88e06612d820bc3be72877c74f257b561b19"; @@ -47,7 +55,6 @@ class KffDataObjectHandlerTest extends UnitTest { static final String DATA_CRC32 = "33323239323631363138"; - @Nullable protected KffDataObjectHandler kff; @Nullable @@ -56,7 +63,7 @@ class KffDataObjectHandlerTest extends UnitTest { @Override @BeforeEach - public void setUp() throws Exception { + public void setUp() { kff = new KffDataObjectHandler(); try (InputStream doc = new ResourceReader().getResourceAsStream(resource)) { byte[] data = IOUtils.toByteArray(doc); @@ -79,18 +86,33 @@ public void tearDown() throws Exception { void testMapWithEmptyPrefix() { Map m = kff.hashData(DATA, "junk"); assertNotNull(m.get(KffDataObjectHandler.KFF_PARAM_MD5), "Empty prefix returns normal values"); + + m.clear(); + + m = kff.hashData(SBC_DATA, "junk"); + assertNotNull(m.get(KffDataObjectHandler.KFF_PARAM_MD5), "Empty prefix returns normal values"); } @Test void testMapWithNullPrefix() { Map m = kff.hashData(DATA, "junk", null); assertNotNull(m.get(KffDataObjectHandler.KFF_PARAM_MD5), "Null prefix returns normal values"); + + m.clear(); + + m = kff.hashData(SBC_DATA, "junk", null); + assertNotNull(m.get(KffDataObjectHandler.KFF_PARAM_MD5), "Null prefix returns normal values"); } @Test void testMapWithPrefix() { Map m = kff.hashData(DATA, "name", "foo"); assertNotNull(m.get("foo" + KffDataObjectHandler.KFF_PARAM_MD5), "Prefix prepends on normal key names but we got " + m.keySet()); + + m.clear(); + + m = kff.hashData(SBC_DATA, "name", "foo"); + assertNotNull(m.get("foo" + KffDataObjectHandler.KFF_PARAM_MD5), "Prefix prepends on normal key names but we got " + m.keySet()); } @Test @@ -203,4 +225,99 @@ void testSetAndGetHash() { payload.deleteParameter(KffDataObjectHandler.KFF_PARAM_SHA512); assertEquals(DATA_SHA384, KffDataObjectHandler.getBestAvailableHash(payload)); } + + @Test + void testWithChannelFactory() { + kff = new KffDataObjectHandler(true, true, true); + payload.setParameter(KffDataObjectHandler.KFF_PARAM_KNOWN_FILTER_NAME, "test.filter"); + payload.setChannelFactory(SBC_DATA); + kff.hash(payload); + assertEquals("test.filter", payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "FILTERED_BY")); + assertTrue(KffDataObjectHandler.hashPresent(payload)); + assertEquals(DATA_MD5, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5)); + assertEquals(DATA_CRC32, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "CRC32")); + assertEquals(DATA_SSDEEP, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SSDEEP)); + assertEquals(DATA_SHA1, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1)); + assertEquals(DATA_SHA256, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256)); + assertEquals(KffDataObjectHandler.KFF_DUPE_CURRENT_FORM, payload.getFileType()); + assertArrayEquals(new byte[0], payload.data()); + } + + @Test + void testWithEmptyChannelFactory() { + kff = new KffDataObjectHandler(true, true, true); + payload.setParameter(KffDataObjectHandler.KFF_PARAM_KNOWN_FILTER_NAME, "test.filter"); + payload.setChannelFactory(SeekableByteChannelHelper.EMPTY_CHANNEL_FACTORY); + kff.hash(payload); + assertEquals("test.filter", payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "FILTERED_BY")); + assertFalse(KffDataObjectHandler.hashPresent(payload)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "CRC32")); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SSDEEP)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256)); + assertEquals("test", payload.getFileType()); + assertArrayEquals(new byte[0], payload.data()); + assertEquals(SeekableByteChannelHelper.EMPTY_CHANNEL_FACTORY, payload.getChannelFactory()); + } + + @Test + void testNullPayload() { + assertDoesNotThrow(() -> kff.hash(null)); + } + + @Test + void testRemovingHash() { + final SeekableByteChannelFactory exceptionSbcf = () -> new AbstractSeekableByteChannel() { + @Override + protected void closeImpl() { + // Do nothing + } + + @Override + protected int readImpl(ByteBuffer byteBuffer) throws IOException { + throw new IOException("Test exception"); + } + + @Override + protected long sizeImpl() throws IOException { + throw new IOException("Test exception"); + } + }; + + payload.setChannelFactory(exceptionSbcf); + kff.hash(payload); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "FILTERED_BY")); + assertFalse(KffDataObjectHandler.hashPresent(payload)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "CRC32")); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SSDEEP)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1)); + assertNull(payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256)); + assertNotEquals(KffDataObjectHandler.KFF_DUPE_CURRENT_FORM, payload.getFileType()); + + payload.setParameter(KffDataObjectHandler.KFF_PARAM_KNOWN_FILTER_NAME, "test.filter"); + payload.setChannelFactory(SBC_DATA); + kff.hash(payload); + assertEquals("test.filter", payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "FILTERED_BY")); + assertTrue(KffDataObjectHandler.hashPresent(payload)); + assertEquals(DATA_MD5, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_MD5)); + assertEquals(DATA_CRC32, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_BASE + "CRC32")); + assertEquals(DATA_SSDEEP, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SSDEEP)); + assertEquals(DATA_SHA1, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA1)); + assertEquals(DATA_SHA256, payload.getStringParameter(KffDataObjectHandler.KFF_PARAM_SHA256)); + assertEquals(KffDataObjectHandler.KFF_DUPE_CURRENT_FORM, payload.getFileType()); + + } + + @Test + void testNullHashData() { + assertEquals(new HashMap<>(), kff.hashData((SeekableByteChannelFactory) null, null)); + } + + @Test + void testEmptySbcf() { + assertEquals(new HashMap<>(), kff.hashData(SeekableByteChannelHelper.EMPTY_CHANNEL_FACTORY, null)); + } + }