Skip to content

Commit

Permalink
updating FlexibleDateTimeFormatter to cover edge cases (#770)
Browse files Browse the repository at this point in the history
  • Loading branch information
arp-0984 authored Jun 18, 2024
1 parent ed4fa92 commit 0b64744
Show file tree
Hide file tree
Showing 3 changed files with 716 additions and 48 deletions.
124 changes: 107 additions & 17 deletions src/main/java/emissary/util/FlexibleDateTimeParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

Expand All @@ -35,13 +38,14 @@
* <p>
* Natty - It handled a good chunk of the formats but not all.
*/
public class FlexibleDateTimeParser {
public final class FlexibleDateTimeParser {

/* Logger */
private static final Logger logger = LoggerFactory.getLogger(FlexibleDateTimeParser.class);

/* Configuration Variables */
private static final String CFG_FORMAT = "FORMAT_DATETIME";
private static final String CFG_FORMAT_MAIN = "FORMAT_DATETIME_MAIN";
private static final String CFG_FORMAT_EXTRA = "FORMAT_DATETIME_EXTRA";
private static final String CFG_TIMEZONE = "TIMEZONE";
private static final String DEFAULT_TIMEZONE = "GMT";
private static final String SPACE = " ";
Expand All @@ -50,14 +54,25 @@ public class FlexibleDateTimeParser {
/* Remove all tabs and extra spaces */
private static final Pattern REPLACE = Pattern.compile("\t+|[ ]+", Pattern.DOTALL);

/* Remove other junk */
private static final Pattern REMOVE = Pattern.compile("<.+?>$|=0D$", Pattern.DOTALL);
/*
* Remove other junk -- anything in an html tag, all parenthesis and quotes, and any non-word characters at the
* beginning or end
*/
private static final Pattern REMOVE = Pattern.compile("<.+?>$|=0D$|\\(|\\)|\"|\\[|]|\\W+$|^\\W+", Pattern.DOTALL);
/*
* This is our last ditch parsing effort if we failed to parse the string - remove all extra text after the numeric time
* zone offset
*/
private static final Pattern EXTRA_TEXT_REMOVE = Pattern.compile("(\\+\\d{4}).*$");

/* timezone - config var: TIMEZONE */
private static ZoneId timezone = ZoneId.of(DEFAULT_TIMEZONE);

/* date time formats - vars: FORMAT_DATETIME */
private static List<DateTimeFormatter> dateFormats = new ArrayList<>();
/* date time formats - vars: FORMAT_DATETIME_MAIN */
private static List<DateTimeFormatter> dateFormatsMain = new ArrayList<>();

/* Extra date time formats - list to try if our main list has failed - vars: FORMAT_DATETIME_EXTRA */
private static List<DateTimeFormatter> dateFormatsExtra = new ArrayList<>();

/* init */
static {
Expand All @@ -74,13 +89,74 @@ public static ZoneId getTimezone() {
}

/**
* Attempts to parse a string date using pre-configured patterns
* Attempts to parse a string date using pre-configured patterns. Default not trying the extensive date/time format list
*
* @param dateString the string to parse
* @return the parsed immutable and thread-safe zoned-date, or null if it failed to parse
*/
public static ZonedDateTime parse(final String dateString) {
return parse(dateString, dateFormats);
return parse(dateString, false);
}

/**
* Attempts to parse a string date using pre-configured patterns
*
* @param dateString the string to parse
* @param tryExtensiveParsing True if we want to try out complete list of date/time formats False if we only want to
* attempt the most common date/time formats
* @return the parsed immutable and thread-safe zoned-date, or null if it failed to parse
*/
public static ZonedDateTime parse(final String dateString, boolean tryExtensiveParsing) {
ZonedDateTime zdt = parsingHelper(dateString, tryExtensiveParsing);

if (zdt != null || !tryExtensiveParsing) {
return zdt;
} else {
// if that all failed and we want to attempt extensive parsing, attempt the last ditch efforts we can try
return lastDitchParsingEffort(dateString);
}
}

/**
* If all our formats failed to parse a date string, give it one last try to parse it. Look for a numeric offset (e.g.
* +0000) and remove all text afterwards. This should cover another set of cases where there is random text appended to
* the end of the string, as well as removing invalid non-numeric time zone offsets while still picking up the numeric
* offset Assumption - that tryExtensiveParsing is true - we should only get to this point if we want to try our best to
* parse
*
* @param date The date string to parse
* @return the ZonedDateTime object if removing text at the end was successful, or null otherwise
*/
static ZonedDateTime lastDitchParsingEffort(final String date) {

// Attempt to remove all text after the numeric offset and try again - this should give us a valid date string
// to work with
Matcher matcher = EXTRA_TEXT_REMOVE.matcher(date);
if (matcher.find()) {
String secondChanceDate = matcher.replaceAll(matcher.group(1));
// if we removed text, attempt to parse again to see if we are more successful this time
return parsingHelper(secondChanceDate, true);
}
return null;
}

/**
* Created to help against code duplication. Calls parse with the standard set of date formats, and then if that fails,
* attempt the extra set of date formats if tryExtensiveParsing is set to true.
*
* @param dateString The string we are attempting to parse
* @param tryExtensiveParsing Whether or not to use the extensive set of date formats
* @return The ZonedDateTime object if our parsing was successful, or null if not
*/
private static ZonedDateTime parsingHelper(final String dateString, boolean tryExtensiveParsing) {
ZonedDateTime zdt = parse(dateString, dateFormatsMain);

// if we got a successful parse or we don't want to attempt "extensive parsing", return here
if (!tryExtensiveParsing || zdt != null) {
return zdt;
}
zdt = parse(dateString, dateFormatsExtra);
return zdt;
}

/**
Expand Down Expand Up @@ -116,9 +192,12 @@ public static ZonedDateTime parse(final String dateString, final List<DateTimeFo

try {
// try for a zoned date (has timezone), local date time (no time zone), or just a local date (no time)
TemporalAccessor accessor = formatter.parseBest(cleanedDateString, ZonedDateTime::from, LocalDateTime::from, LocalDate::from);
TemporalAccessor accessor =
formatter.parseBest(cleanedDateString, ZonedDateTime::from, OffsetDateTime::from, LocalDateTime::from, LocalDate::from);
if (accessor instanceof ZonedDateTime) {
return (ZonedDateTime) accessor; // return the date time w/ timezone
} else if (accessor instanceof OffsetDateTime) {
return ((OffsetDateTime) accessor).atZoneSameInstant(timezone);
} else if (accessor instanceof LocalDateTime) {
return ((LocalDateTime) accessor).atZone(timezone); // set the timezone
} else if (accessor instanceof LocalDate) {
Expand All @@ -143,7 +222,7 @@ private static void configure() {
// fire up the configurator
Configurator configurator = ConfigUtil.getConfigInfo(FlexibleDateTimeParser.class);
setupTimezone(configurator.findStringEntry(CFG_TIMEZONE, DEFAULT_TIMEZONE));
setupDateFormats(configurator.findStringMatchEntries(CFG_FORMAT));
setupDateFormats(configurator.findStringMatchEntries(CFG_FORMAT_MAIN), configurator.findStringMatchEntries(CFG_FORMAT_EXTRA));
} catch (IOException e) {
throw new IllegalArgumentException("Could not configure parser!!", e);
}
Expand All @@ -170,13 +249,23 @@ private static void setupTimezone(final String configTimezone) {
/**
* Override the default date formats
*
* @param configEntries the list of override formats from the config file
* @param configEntriesMain the list of main override formats from the config file
* @param configEntriesExtra the list of extra override formats from the config file
*/
private static void setupDateFormats(final List<ConfigEntry> configEntries) {
List<DateTimeFormatter> dateTimeFormats = getConfigFormats(configEntries);
if (CollectionUtils.isNotEmpty(dateTimeFormats)) {
dateFormats = Collections.unmodifiableList(dateTimeFormats);
logger.debug("Created successfully. Created {} of {} formats from config", dateFormats.size(), configEntries.size());
private static void setupDateFormats(final List<ConfigEntry> configEntriesMain, final List<ConfigEntry> configEntriesExtra) {
List<DateTimeFormatter> dateTimeFormatsMain = getConfigFormats(configEntriesMain);
if (CollectionUtils.isNotEmpty(dateTimeFormatsMain)) {
dateFormatsMain = Collections.unmodifiableList(dateTimeFormatsMain);
logger.debug("Created successfully. Created {} of {} formats from config", dateFormatsMain.size(), configEntriesMain.size());
} else {
logger.error("Could not create with configured variables");
throw new IllegalArgumentException("No date/time formats configured!!");
}

List<DateTimeFormatter> dateTimeFormatsExtra = getConfigFormats(configEntriesExtra);
if (CollectionUtils.isNotEmpty(dateTimeFormatsExtra)) {
dateFormatsExtra = Collections.unmodifiableList(dateTimeFormatsExtra);
logger.debug("Created successfully. Created {} of {} formats from config", dateFormatsExtra.size(), configEntriesExtra.size());
} else {
logger.error("Could not create with configured variables");
throw new IllegalArgumentException("No date/time formats configured!!");
Expand Down Expand Up @@ -206,7 +295,7 @@ private static List<DateTimeFormatter> getConfigFormats(final List<ConfigEntry>
@Nullable
private static DateTimeFormatter getFormatter(ConfigEntry entry) {
try {
return DateTimeFormatter.ofPattern(entry.getValue());
return new DateTimeFormatterBuilder().parseCaseInsensitive().appendPattern(entry.getValue()).toFormatter();
} catch (IllegalArgumentException e) {
// log the bad one and move on because there could be other possible patterns
logger.error("Error parsing pattern [{}]: {}", entry.getValue(), e.getLocalizedMessage());
Expand All @@ -229,6 +318,7 @@ private static String cleanDateString(final String date) {
String cleanedDateString = StringUtils.substring(date, 0, 100);
cleanedDateString = REPLACE.matcher(cleanedDateString).replaceAll(SPACE);
cleanedDateString = REMOVE.matcher(cleanedDateString).replaceAll(EMPTY);

return StringUtils.trimToNull(cleanedDateString);
}

Expand Down
Loading

0 comments on commit 0b64744

Please sign in to comment.