Skip to content

Commit

Permalink
update kff hash exception handling and SeekableByteChannel interactions
Browse files Browse the repository at this point in the history
  • Loading branch information
jpdahlke committed Sep 14, 2024
1 parent 4d84b8d commit 3e75d78
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 67 deletions.
7 changes: 1 addition & 6 deletions src/main/java/emissary/core/IBaseDataObjectHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -153,11 +152,7 @@ public static void addParentInformationToChild(final IBaseDataObject parentIBase
KffDataObjectHandler.parentToChild(childIBaseDataObject);

// Hash the new child data, overwrites parent hashes if any
try {
kffDataObjectHandler.hash(childIBaseDataObject, true);
} catch (NoSuchAlgorithmException | IOException e) {
// Do not add the hash parameters
}
kffDataObjectHandler.hash(childIBaseDataObject, true);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
public final class SeekableByteChannelHelper {
private static final Logger logger = LoggerFactory.getLogger(SeekableByteChannelHelper.class);

/** Channel factory backed by an empty byte array. Used for situations when a BDO should have its payload discarded. */
public static final SeekableByteChannelFactory EMPTY_CHANNEL_FACTORY = memory(new byte[0]);

private SeekableByteChannelHelper() {}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/emissary/kff/KffChain.java
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public List<String> getAlgorithms() {
*
* @return result of check
*/
public KffResult check(final String itemName, final byte[] content) throws Exception {
public KffResult check(final String itemName, final byte[] content) throws NoSuchAlgorithmException {
final ChecksumResults sums = computeSums(content);
KffResult answer = null;
if (content.length < kffMinDataSize || list.isEmpty()) {
Expand Down
106 changes: 57 additions & 49 deletions src/main/java/emissary/kff/KffDataObjectHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import emissary.core.IBaseDataObject;
import emissary.core.IBaseDataObject.MergePolicy;
import emissary.core.channels.SeekableByteChannelFactory;
import emissary.core.channels.SeekableByteChannelHelper;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
Expand Down Expand Up @@ -71,7 +72,7 @@ public KffDataObjectHandler(boolean truncateKnownData, boolean setFormOnKnownDat
}

/**
* Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file
* Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file
* status if so configured
*
* @param data the bytes to hash
Expand All @@ -83,81 +84,88 @@ public Map<String, String> hashData(byte[] data, String name) {
}

/**
* Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file
* Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file
* status if so configured
*
* @param data the bytes to hash
* @param name th name of the data (for reporting)
* @param name the name of the data (for reporting)
* @param prefix prepended to hash name entries
* @return parameter entries suitable for a BaseDataObject
*/
public Map<String, String> hashData(@Nullable byte[] data, String name, @Nullable String prefix) {
Map<String, String> results = new HashMap<>();

if (prefix == null) {
prefix = "";
}

KffResult kffCheck = null;
if (data != null && data.length > 0) {
try {
KffResult kffCheck = kff.check(name, data);

// Store all computed results in data object params
for (String alg : kffCheck.getResultNames()) {
results.put(prefix + KFF_PARAM_BASE + alg, kffCheck.getResultString(alg));
}

// Set params if we have a hit
if (kffCheck.isKnown()) {
results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, kffCheck.getFilterName());
}
if (kffCheck.isDupe()) {
results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, kffCheck.getFilterName());
}
} catch (Exception kffex) {
kffCheck = kff.check(name, data);
} catch (NoSuchAlgorithmException kffex) {
logger.warn("Unable to compute kff on " + name, kffex);
}
}
return results;

return processKffResult(kffCheck, prefix);
}

/**
* Compute the configure hashes and return as a map Also include entries indicating the know file or duplicate file
* Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file
* status if so configured
*
* @param sbcf the data to hash
* @param name the name of the data (for reporting)
* @return parameter entries suitable for a BaseDataObject
*/
public Map<String, String> hashData(final SeekableByteChannelFactory sbcf, final String name) {
return hashData(sbcf, name, "");
}

/**
* Compute the configured hashes and return as a map. Also include entries indicating the known file or duplicate file
* status if so configured.
*
* @param sbcf the data to hash
* @param name th name of the data (for reporting)
* @param name the name of the data (for reporting)
* @param prefix prepended to hash name entries
* @return parameter entries suitable for a BaseDataObject
* @throws IOException if the data can't be read
* @throws NoSuchAlgorithmException if the checksum can't be computed
*/
public Map<String, String> hashData(final SeekableByteChannelFactory sbcf, final String name, String prefix)
throws IOException, NoSuchAlgorithmException {
final Map<String, String> results = new HashMap<>();
public Map<String, String> hashData(final SeekableByteChannelFactory sbcf, final String name, String prefix) {

if (prefix == null) {
prefix = "";
}

KffResult kffCheck = null;
if (sbcf != null) {
try (final SeekableByteChannel sbc = sbcf.create()) {
if (sbc.size() > 0) {
final KffResult kffCheck = kff.check(name, sbcf);

// Store all computed results in data object params
for (String alg : kffCheck.getResultNames()) {
results.put(prefix + KFF_PARAM_BASE + alg, kffCheck.getResultString(alg));
}

// Set params if we have a hit
if (kffCheck.isKnown()) {
results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, kffCheck.getFilterName());
}
if (kffCheck.isDupe()) {
results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, kffCheck.getFilterName());
}
kffCheck = kff.check(name, sbcf);
}
} catch (NoSuchAlgorithmException | IOException kffex) {
logger.warn("Unable to compute kff on " + name, kffex);
}
}

return processKffResult(kffCheck, prefix);
}

private static Map<String, String> processKffResult(KffResult result, String prefix) {
Map<String, String> results = new HashMap<>();

if (result != null) {
// Store all computed results in data object params
for (String alg : result.getResultNames()) {
results.put(prefix + KFF_PARAM_BASE + alg, result.getResultString(alg));
}

// Set params if we have a hit
if (result.isKnown()) {
results.put(prefix + KFF_PARAM_KNOWN_FILTER_NAME, result.getFilterName());
}
if (result.isDupe()) {
results.put(prefix + KFF_PARAM_DUPE_FILTER_NAME, result.getFilterName());
}
}

Expand All @@ -170,22 +178,16 @@ public Map<String, String> hashData(final SeekableByteChannelFactory sbcf, final
* @param d the data object
*/
public void hash(@Nullable final IBaseDataObject d) {
try {
hash(d, false);
} catch (NoSuchAlgorithmException | IOException e) {
// Do nothing
}
hash(d, false);
}

/**
* Compute the hash of a data object's data
*
* @param d the data object
* @param useSbc use the {@link SeekableByteChannel} interface
* @throws IOException if the data can't be read
* @throws NoSuchAlgorithmException if the checksum can't be computed
*/
public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws NoSuchAlgorithmException, IOException {
public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) {

if (d == null) {
return;
Expand All @@ -203,6 +205,8 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws
} else {
return; // NOSONAR
}
} catch (IOException e) {
logger.error("Couldn't hash data {}", d.shortName());
} finally {
// preserve the original MD5 only if 1) we hadn't already done so and 2) rehashing produced a new MD5 value
if (!d.hasParameter(MD5_ORIGINAL) && previouslyComputedMd5HasChanged(d, originalMD5)) {
Expand All @@ -219,6 +223,9 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws
d.replaceCurrentForm(KFF_DUPE_CURRENT_FORM);
}
if (truncateKnownData) {
if (useSbc) {
d.setChannelFactory(SeekableByteChannelHelper.EMPTY_CHANNEL_FACTORY);
}
d.setData(null);
}
}
Expand All @@ -229,6 +236,7 @@ public void hash(@Nullable final IBaseDataObject d, final boolean useSbc) throws
*
* @param d IBaseDataObject being processed
*/
@Nullable
static String captureOriginalMD5BeforeRehashing(IBaseDataObject d) {
// If the IBDO already has an MD5_ORIGINAL parameter, return null.
if (d.hasParameter(MD5_ORIGINAL)) {
Expand Down
7 changes: 1 addition & 6 deletions src/main/java/emissary/place/KffHashPlace.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import java.io.IOException;
import java.io.InputStream;
import java.security.NoSuchAlgorithmException;

/**
* Hashing place to hash payload unless hashes are set or skip flag is set. This place is intended to execute in the
Expand Down Expand Up @@ -62,11 +61,7 @@ public void process(IBaseDataObject payload) throws ResourceException {
return;
}

try {
kff.hash(payload, useSbc);
} catch (final NoSuchAlgorithmException | IOException e) {
logger.error("KffHashPlace failed to hash data for {} - this shouldn't happen", payload.shortName(), e);
}
kff.hash(payload, useSbc);
}

}
4 changes: 1 addition & 3 deletions src/test/java/emissary/core/IBaseDataObjectHelperTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -314,7 +313,6 @@ void testAddParentInformationToChild() throws Exception {
final IBaseDataObject childIbdo1 = new BaseDataObject();

childIbdo1.setChannelFactory(InMemoryChannelFactory.create("0123456789".getBytes(StandardCharsets.US_ASCII)));
Mockito.doThrow(NoSuchAlgorithmException.class).when(mockKffDataObjectHandler1).hash(Mockito.any(BaseDataObject.class), Mockito.anyBoolean());
IBaseDataObjectHelper.addParentInformationToChild(parentIbdo, childIbdo1,
true, alwaysCopyMetadataKeys, placeKey, mockKffDataObjectHandler1);
assertFalse(KffDataObjectHandler.hashPresent(childIbdo1));
Expand Down Expand Up @@ -351,7 +349,7 @@ void testAddParentInformationToChildren() {
}

@Test
void testAddParentInformationToChildExcluding() throws Exception {
void testAddParentInformationToChildExcluding() {
final IBaseDataObject parentIbdo = ibdo1;
final IBaseDataObject childIbdo = ibdo2;

Expand Down
Loading

0 comments on commit 3e75d78

Please sign in to comment.