Skip to content

Commit

Permalink
Using FILE_ABSOLUTEPATH and Original-Filename to populate FILEXT (#600)
Browse files Browse the repository at this point in the history
  • Loading branch information
arp-0984 authored Dec 27, 2023
1 parent be125a0 commit e8c0768
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 18 deletions.
2 changes: 2 additions & 0 deletions src/main/java/emissary/core/constants/Parameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ public class Parameters {
// Common parameters
public static final String DOCUMENT_TITLE = "DocumentTitle";
public static final String EVENT_DATE = "EventDate";
public static final String FILEXT = "FILEXT";
public static final String FILE_ABSOLUTEPATH = "FILE_ABSOLUTEPATH";
public static final String FILE_DATE = "FILE_DATE";
public static final String FILE_NAME = "FILE_NAME";
public static final String INPUT_FILEDATE = "INPUT_FILEDATE";
Expand Down
81 changes: 64 additions & 17 deletions src/main/java/emissary/output/DropOffUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import emissary.util.TimeUtil;
import emissary.util.shell.Executrix;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -38,6 +39,8 @@
import static emissary.core.Form.PREFIXES_LANG;
import static emissary.core.Form.TEXT;
import static emissary.core.Form.UNKNOWN;
import static emissary.core.constants.Parameters.FILEXT;
import static emissary.core.constants.Parameters.FILE_ABSOLUTEPATH;
import static emissary.core.constants.Parameters.ORIGINAL_FILENAME;

public class DropOffUtil {
Expand Down Expand Up @@ -145,6 +148,7 @@ protected void configure(final Configurator configG) {
if (this.maxFilextLen < 0) {
this.maxFilextLen = Integer.MAX_VALUE;
}

} else {
logger.debug("Configuration is null for DropOffUtil, using defaults");
this.executrix = new Executrix();
Expand Down Expand Up @@ -895,32 +899,75 @@ public Date extractEventDateFrom(final IBaseDataObject d, final boolean lastReso
}

/**
* Extracts from the provided {@link IBaseDataObject} the last file extension from each value in the "Original-Filename"
* parameter, if that value contains "." before its last character. If one or more file extensions are extracted, the
* IBaseDataObject's "FILEXT" parameter is set as the unique set of extracted file extensions, converted to lowercase.
* Utilizes the static methods getFullFilepathsFromParams and getFileExtensions to extract the file extensions from all
* the filenames of the object of a given {@link IBaseDataObject}. If one or more file extensions are extracted, the
* IBaseDataObject's FILEXT parameter is set as the unique set of extracted file extensions, converted to lowercase.
*
* @param p IBaseDataObject to process
*
*/
void extractUniqueFileExtensions(IBaseDataObject p) {
if (p.hasParameter(ORIGINAL_FILENAME)) {
final Set<String> extensions = new HashSet<>();
for (Object filename : p.getParameter(ORIGINAL_FILENAME)) {
final String fn = (String) filename;
if (StringUtils.isNotEmpty(fn) && fn.lastIndexOf('.') > -1) {
final int pos = fn.lastIndexOf('.') + 1;
if (pos < fn.length()) {
final String fext = fn.substring(pos).toLowerCase();
if (fext.length() > 0 && fext.length() <= this.maxFilextLen) {
extensions.add(fext);
}
List<String> filenames = getFullFilepathsFromParams(p);
Set<String> extensions = getFileExtensions(filenames, this.maxFilextLen);
if (!extensions.isEmpty()) {
p.setParameter(FILEXT, extensions);
}
}

/**
* Given a list of filenames, extract and return a set of non-blank file extensions converted to lowercase.
*
* @param filenames The list of filenames to examine
* @param maxFilextLen The maximum size we want a file extension to be
* @return A set of unique file extensions from the filename list
*/
public static Set<String> getFileExtensions(List<String> filenames, int maxFilextLen) {
final Set<String> extensions = new HashSet<>();
for (String filename : filenames) {

// add the file extension if it is smaller than maxFileextLen
final String fext = FilenameUtils.getExtension(filename);
if (StringUtils.isNotBlank(fext) && fext.length() <= maxFilextLen) {
extensions.add(fext.toLowerCase());
}
}
return extensions;
}

/**
* Checks the Original-Filename and FILE_ABSOLUTEPATH for the filename of the object. Returns a list with the non-empty
* strings found in these fields. If nothing is found in either field, return an empty list.
*
* @param d The IBDO
* @return The list of filenames found in the field Original-Filename or FILE_ABSOLUTEPATH
*/
public static List<String> getFullFilepathsFromParams(IBaseDataObject d) {
return getFullFilepathsFromParams(d, new String[] {ORIGINAL_FILENAME, FILE_ABSOLUTEPATH});
}

/**
* Uses the specified list of fields to check for filenames of the object. Returns a list with the non-empty strings
* found in these fields. If nothing is found in either field, return an empty list.
*
* @param d The IBDO
* @param filenameFields The list of fields on the IBDO to check
* @return The list of filenames found in the list of fields on the IBDO
*/
public static List<String> getFullFilepathsFromParams(IBaseDataObject d, String[] filenameFields) {

List<String> filenames = new ArrayList<>();

for (String ibdoField : filenameFields) {
if (d.hasParameter(ibdoField)) {
for (Object filename : d.getParameter(ibdoField)) {
String stringFileName = (String) filename;
if (StringUtils.isNotBlank(stringFileName)) {
filenames.add(stringFileName);
}
}
}
if (!extensions.isEmpty()) {
p.setParameter("FILEXT", extensions);
}
}
return filenames;
}

/**
Expand Down
67 changes: 66 additions & 1 deletion src/test/java/emissary/output/DropOffUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import static emissary.core.Form.TEXT;
import static emissary.core.constants.Parameters.EVENT_DATE;
import static emissary.core.constants.Parameters.FILEXT;
import static emissary.core.constants.Parameters.FILE_ABSOLUTEPATH;
import static emissary.core.constants.Parameters.FILE_DATE;
import static emissary.core.constants.Parameters.ORIGINAL_FILENAME;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand Down Expand Up @@ -525,7 +527,6 @@ void testGetFileType() {
@Test
void testExtractUniqueFileExtensions() {
// these should be constants
final String FILEXT = "FILEXT";
DropOffUtil util = new DropOffUtil();

final IBaseDataObject bdo = new BaseDataObject();
Expand All @@ -550,6 +551,37 @@ void testExtractUniqueFileExtensions() {
assertTrue(fileExts.contains("mp3"), "FILEXT values should contain \"mp3\"");
}

@Test
void testExtractFileExtensionsWithFullFilepaths() {
DropOffUtil util = new DropOffUtil();

// tests a combination of either FILE_ABSOLUTEPATH and Original-Filename, neither, and both set at once
String[] fileAbsolutepaths = {"D:\\Users\\jdoe\\Documents\\Taxes 2023.csv", "", "/paper.abc.zzz",
"/home/jdoe/SHARED_D.IR/cat.mov", "/home/user/.bashrc", ""};
String[] originalFilenames = {"", "D:\\Users\\jdoe\\interesting.folder\\a.table", "flowers.456.123",
"/home/jdoe/SHARED_D.IR/cat", "taxes.thisfileextensionistoolong", ""};

String[][] extensions = {{"csv"}, {"table"}, {"zzz", "123"}, {"mov"}, {"bashrc"}, {}};

final IBaseDataObject ibdo = new BaseDataObject();

for (int i = 0; i < fileAbsolutepaths.length; i++) {
ibdo.setParameter(FILE_ABSOLUTEPATH, fileAbsolutepaths[i]);
ibdo.setParameter(ORIGINAL_FILENAME, originalFilenames[i]);
util.extractUniqueFileExtensions(ibdo);
if (extensions[i].length == 0) {
assertFalse(ibdo.hasParameter(FILEXT));
}
for (String extension : extensions[i]) {
assertEquals(extensions[i].length, ibdo.getParameter(FILEXT).size(), "Only "
+ extensions[i].length + " file extensions should have been extracted");
assertTrue(ibdo.getParameter(FILEXT).contains(extension), "FILEXT should be extracted");
}
// reset for the next test
ibdo.clearParameters();
}
}

@Test
void testCleanSpecPath() {
assertEquals("/this/is/fine", util.cleanSpecPath("/this/is/fine"));
Expand All @@ -558,6 +590,39 @@ void testCleanSpecPath() {
assertEquals("/this/./././/./is/fine", util.cleanSpecPath("/this/....../../..//./is/fine"));
}

@Test
void testGetFullFilepathsFromParams() {
IBaseDataObject ibdo = new BaseDataObject();
List<String> bestFilenames;

ibdo.setParameter(ORIGINAL_FILENAME, "");
ibdo.setParameter(FILE_ABSOLUTEPATH, "");
bestFilenames = DropOffUtil.getFullFilepathsFromParams(ibdo);
assertEquals(0, bestFilenames.size(), "No filename should have been found");

ibdo.setParameter(FILE_ABSOLUTEPATH, "theOtherFile.csv");
bestFilenames = DropOffUtil.getFullFilepathsFromParams(ibdo);
assertEquals(1, bestFilenames.size(), "There should be one filename extracted");
assertEquals("theOtherFile.csv", bestFilenames.get(0), "The FILE_ABSOLUTEPATH should have been extracted");

ibdo.setParameter(ORIGINAL_FILENAME, "file.docx");
bestFilenames = DropOffUtil.getFullFilepathsFromParams(ibdo);
assertEquals(2, bestFilenames.size(), "There should be two filenames extracted");
assertEquals("file.docx", bestFilenames.get(0), "The Original-Filename should have been extracted");
assertEquals("theOtherFile.csv", bestFilenames.get(1), "The Original-Filename should have been extracted");
}

@Test
void getFullFilepathsFromParamsCustomFields() {
IBaseDataObject ibdo = new BaseDataObject();
ibdo.setParameter("CustomField", "customName.txt");
ibdo.setParameter(ORIGINAL_FILENAME, "groceries.xml");
List<String> bestFilenames = DropOffUtil.getFullFilepathsFromParams(ibdo, new String[] {"CustomField"});

assertEquals(1, bestFilenames.size(), "Only one filename should have been extracted");
assertEquals("customName.txt", bestFilenames.get(0), "Only the value in CustomField should have been extracted");
}

private void setupMetadata(IBaseDataObject bdo, String fieldValue, DropOffUtil.FileTypeCheckParameter fileTypeCheckParameter) {
bdo.clearParameters();
bdo.putParameter(fileTypeCheckParameter.getFieldName(), fieldValue);
Expand Down

0 comments on commit e8c0768

Please sign in to comment.