Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional duplicate timeseries index skip in CSV #3277

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ public class IrregularTimeSeriesIndex extends AbstractTimeSeriesIndex {

public IrregularTimeSeriesIndex(long[] times) {
this.times = Objects.requireNonNull(times);
for (int i = 1; i < times.length; i++) {
if (times[i] <= times[i - 1]) {
throw new IllegalArgumentException("Time list should be sorted and without duplicate values");
}
}
if (times.length == 0) {
throw new IllegalArgumentException("Empty time list");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.time.ZonedDateTime;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -290,19 +291,28 @@ void parseToken(int i, String token) {
}
}

void parseLine(String[] tokens) {
void parseLineDuplicate(String[] tokens) {
long time = parseTokenTime(tokens[0]);
if (times.isEmpty() || times.get(times.size() - 1) != time) {
for (int i = fixedColumns; i < tokens.length; i++) {
String token = tokens[i] != null ? tokens[i].trim() : "";
parseToken(i, token);
}
parseTokenData(tokens);
times.add(time);
} else {
LOGGER.warn("Row with the same time have already been read, the row will be skipped");
}
}

void parseLine(String[] tokens) {
parseTokenData(tokens);
times.add(parseTokenTime(tokens[0]));
}

void parseTokenData(String[] tokens) {
for (int i = fixedColumns; i < tokens.length; i++) {
String token = tokens[i] != null ? tokens[i].trim() : "";
parseToken(i, token);
}
}

long parseTokenTime(String token) {
TimeFormat timeFormat = timeSeriesCsvConfig.timeFormat();
return switch (timeFormat) {
Expand Down Expand Up @@ -392,6 +402,9 @@ private long checkRegularSpacing() {
static void readCsvValues(ResultIterator<String[], ParsingContext> iterator, CsvParsingContext context,
Map<Integer, List<TimeSeries>> timeSeriesPerVersion, ReportNode reportNode) {
int currentVersion = Integer.MIN_VALUE;
Consumer<String[]> lineparser = context.timeSeriesCsvConfig.isSkipDuplicateTimeEntry()
? context::parseLineDuplicate
: context::parseLine;
while (iterator.hasNext()) {
String[] tokens = iterator.next();

Expand All @@ -407,8 +420,7 @@ static void readCsvValues(ResultIterator<String[], ParsingContext> iterator, Csv
context.reInit();
currentVersion = version;
}

context.parseLine(tokens);
lineparser.accept(tokens);
}
timeSeriesPerVersion.put(currentVersion, context.createTimeSeries());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,54 @@
*/
public class TimeSeriesCsvConfig {
private static final int DEFAULT_MAX_COLUMNS = 20000;
private static final boolean DEFAULT_SKIP_DUPLICATE = false;

private final DateTimeFormatter dateTimeFormatter;
private final boolean versioned;
private final TimeFormat timeFormat;
private final char separator;
private final int maxColumns;
private final boolean strictVersioningImport;
private final boolean skipDuplicateTimeEntry;

public TimeSeriesCsvConfig() {
this(ZoneId.systemDefault(), TimeSeriesConstants.DEFAULT_SEPARATOR, true, TimeFormat.DATE_TIME, DEFAULT_MAX_COLUMNS, true);
this(ZoneId.systemDefault(), TimeSeriesConstants.DEFAULT_SEPARATOR, true, TimeFormat.DATE_TIME, DEFAULT_MAX_COLUMNS, true, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(ZoneId zoneId) {
this(zoneId, TimeSeriesConstants.DEFAULT_SEPARATOR, true, TimeFormat.DATE_TIME, DEFAULT_MAX_COLUMNS, true);
this(zoneId, TimeSeriesConstants.DEFAULT_SEPARATOR, true, TimeFormat.DATE_TIME, DEFAULT_MAX_COLUMNS, true, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(char separator, boolean versioned, TimeFormat timeFormat) {
this(ZoneId.systemDefault(), separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, true);
this(ZoneId.systemDefault(), separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, true, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(char separator, boolean versioned, TimeFormat timeFormat, boolean strictVersioningImport) {
this(ZoneId.systemDefault(), separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, strictVersioningImport);
this(ZoneId.systemDefault(), separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, strictVersioningImport, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(char separator, boolean versioned, TimeFormat timeFormat, boolean strictVersioningImport,
boolean skipDuplicateTimeEntry) {
this(ZoneId.systemDefault(), separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, strictVersioningImport, skipDuplicateTimeEntry);
}

public TimeSeriesCsvConfig(ZoneId zoneId, char separator, boolean versioned, TimeFormat timeFormat) {
this(zoneId, separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, true);
this(zoneId, separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, true, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(ZoneId zoneId, char separator, boolean versioned, TimeFormat timeFormat, boolean strictVersioningImport) {
this(zoneId, separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, strictVersioningImport);
this(zoneId, separator, versioned, timeFormat, DEFAULT_MAX_COLUMNS, strictVersioningImport, DEFAULT_SKIP_DUPLICATE);
}

public TimeSeriesCsvConfig(ZoneId zoneId, char separator, boolean versioned, TimeFormat timeFormat, int maxColumns, boolean strictVersioningImport) {
public TimeSeriesCsvConfig(ZoneId zoneId, char separator, boolean versioned, TimeFormat timeFormat, int maxColumns,
boolean strictVersioningImport, boolean skipDuplicateTimeEntry) {
this.dateTimeFormatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME.withZone(zoneId);
this.separator = separator;
this.versioned = versioned;
this.timeFormat = timeFormat;
this.maxColumns = maxColumns;
this.strictVersioningImport = strictVersioningImport;
this.skipDuplicateTimeEntry = skipDuplicateTimeEntry;
}

public char separator() {
Expand All @@ -80,4 +90,8 @@ public int getMaxColumns() {
public boolean withStrictVersioningImport() {
return strictVersioningImport;
}

public boolean isSkipDuplicateTimeEntry() {
return skipDuplicateTimeEntry;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,7 @@ void testEquals() {
}

@Test
void testContructorError() {
void testConstructorError() {
assertThrows(IllegalArgumentException.class, IrregularTimeSeriesIndex::create);
long[] duplicates = {0L, 1L, 1L};
assertThrows(IllegalArgumentException.class, () -> new IrregularTimeSeriesIndex(duplicates));
long[] unordered = {0L, 2L, 1L};
assertThrows(IllegalArgumentException.class, () -> new IrregularTimeSeriesIndex(unordered));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -145,19 +145,38 @@ void testFractionsOfSecondsRegularTimeSeriesIndex() {

@Test
void testFractionsOfSecondsRegularTimeSeriesIndexWithDuplicateTime() {
String csv = """
Time;Version;ts1;ts2
0.000;1;1.0;
0.001;1;;a
0.0012;1;3.0;b
0.000;2;4.0;c
0.001;2;5.0;
0.0012;2;6.0;d
""".replaceAll("\n", System.lineSeparator());

TimeSeriesCsvConfig timeSeriesCsvConfig = new TimeSeriesCsvConfig(';', true, TimeFormat.FRACTIONS_OF_SECOND, true);
Map<Integer, List<TimeSeries>> timeSeriesPerVersion = TimeSeries.parseCsv(csv, timeSeriesCsvConfig);

assertOnParsedTimeSeries(timeSeriesPerVersion, IrregularTimeSeriesIndex.class);
}

@Test
void testFractionsOfSecondsRegularTimeSeriesIndexWithSkippedDuplicateTime() {
String csv = """
Time;Version;ts1;ts2
0.000;1;1.0;
0.001;1;;a
0.0015;1;;b
0.002;1;3.0;b
0.000;2;4.0;c
0.0002;2;4.5;c
0.001;2;5.0;
0.0015;2;3.0;b
0.002;2;6.0;d
""".replaceAll("\n", System.lineSeparator());

TimeSeriesCsvConfig timeSeriesCsvConfig = new TimeSeriesCsvConfig(';', true, TimeFormat.FRACTIONS_OF_SECOND, true);
TimeSeriesCsvConfig timeSeriesCsvConfig = new TimeSeriesCsvConfig(';', true,
TimeFormat.FRACTIONS_OF_SECOND, true, true);
Map<Integer, List<TimeSeries>> timeSeriesPerVersion = TimeSeries.parseCsv(csv, timeSeriesCsvConfig);

assertOnParsedTimeSeries(timeSeriesPerVersion, RegularTimeSeriesIndex.class);
Expand Down
Loading