Skip to content

Commit

Permalink
NIFI-14200 Replaced deprecated commons-csv constructs with Collection…
Browse files Browse the repository at this point in the history
… methods (#9670)

Signed-off-by: David Handermann <[email protected]>
  • Loading branch information
dan-s1 authored Jan 29, 2025
1 parent cc270ff commit 2414046
Show file tree
Hide file tree
Showing 14 changed files with 59 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ private static CSVFormat buildCustomFormat(final PropertyContext context, final
builder = builder.setDuplicateHeaderMode(mode);
}

return builder.build();
return builder.get();
}

public static String unescape(String input) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,14 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -67,7 +65,7 @@
)
public class CSVRecordLookupService extends AbstractCSVLookupService implements RecordLookupService {

private static final Set<String> REQUIRED_KEYS = Collections.unmodifiableSet(Stream.of(KEY).collect(Collectors.toSet()));
private static final Set<String> REQUIRED_KEYS = Set.of(KEY);

static final PropertyDescriptor CSV_FORMAT = new PropertyDescriptor.Builder()
.fromPropertyDescriptor(AbstractCSVLookupService.CSV_FORMAT)
Expand All @@ -88,7 +86,7 @@ protected void loadCache() throws IllegalStateException, IOException {
ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
try (final InputStream is = new FileInputStream(csvFile)) {
try (final InputStreamReader reader = new InputStreamReader(is, charset)) {
final CSVParser records = csvFormat.builder().setHeader().setSkipHeaderRecord(true).build().parse(reader);
final CSVParser records = csvFormat.builder().setHeader().setSkipHeaderRecord(true).get().parse(reader);
RecordSchema lookupRecordSchema = null;
for (final CSVRecord record : records) {
final String key = record.get(lookupKeyColumn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.Restricted;
Expand Down Expand Up @@ -58,7 +56,7 @@
)
public class SimpleCsvFileLookupService extends AbstractCSVLookupService implements StringLookupService {

private static final Set<String> REQUIRED_KEYS = Collections.unmodifiableSet(Stream.of(KEY).collect(Collectors.toSet()));
private static final Set<String> REQUIRED_KEYS = Set.of(KEY);

public static final PropertyDescriptor LOOKUP_VALUE_COLUMN =
new PropertyDescriptor.Builder()
Expand Down Expand Up @@ -86,7 +84,7 @@ protected void loadCache() throws IllegalStateException, IOException {
final Map<String, String> properties = new HashMap<>();
try (final InputStream is = new FileInputStream(csvFile)) {
try (final InputStreamReader reader = new InputStreamReader(is, charset)) {
final Iterable<CSVRecord> records = csvFormat.builder().setHeader().setSkipHeaderRecord(true).build().parse(reader);
final Iterable<CSVRecord> records = csvFormat.builder().setHeader().setSkipHeaderRecord(true).get().parse(reader);
for (final CSVRecord record : records) {
final String key = record.get(lookupKeyColumn);
final String value = record.get(lookupValueColumn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ public RecordSchema getSchema(Map<String, String> variables, final InputStream c
try {
CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables);
if (!csvFormat.getSkipHeaderRecord()) {
csvFormat = csvFormat.builder().setHeader().setSkipHeaderRecord(true).build();
csvFormat = csvFormat.builder().setHeader().setSkipHeaderRecord(true).get();
}

try (final InputStream bomInputStream = BOMInputStream.builder().setInputStream(contentStream).get();
final Reader reader = new InputStreamReader(bomInputStream);
final CSVParser csvParser = new CSVParser(reader, csvFormat)) {
final CSVParser csvParser = CSVParser.builder().setReader(reader).setFormat(csvFormat).get()) {

final List<RecordField> fields = new ArrayList<>();
for (final String columnName : csvParser.getHeaderMap().keySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ public CSVRecordReader(final InputStream in, final ComponentLog logger, final Re
withHeader = csvFormat.builder().setHeader(schema.getFieldNames().toArray(new String[0]));
}

csvParser = new CSVParser(reader, withHeader.build());
csvParser = CSVParser.builder()
.setReader(reader)
.setFormat(withHeader.get())
.get();
}

public CSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema, final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand All @@ -48,9 +46,12 @@ public CSVRecordSource(final InputStream in, final PropertyContext context, fina
throw new ProcessException(e);
}

final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).builder().setHeader().setSkipHeaderRecord(true).setTrim(true).build();
final CSVParser csvParser = new CSVParser(reader, csvFormat);
fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet()));
final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).builder().setHeader().setSkipHeaderRecord(true).setTrim(true).get();
final CSVParser csvParser = CSVParser.builder()
.setReader(reader)
.setFormat(csvFormat)
.get();
fieldNames = List.copyOf(csvParser.getHeaderMap().keySet());

csvRecordIterator = csvParser.iterator();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public WriteCSVResult(final CSVFormat csvFormat, final RecordSchema recordSchema
this.timestampFormat = timestampFormat;
this.includeHeaderLine = includeHeaderLine;

final CSVFormat formatWithHeader = csvFormat.builder().setSkipHeaderRecord(true).build();
final CSVFormat formatWithHeader = csvFormat.builder().setSkipHeaderRecord(true).get();
final OutputStreamWriter streamWriter = new OutputStreamWriter(out, charSet);
printer = new CSVPrinter(streamWriter, formatWithHeader);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

public class ITApacheCSVRecordReader {

private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').build();
private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').get();

private List<RecordField> getDefaultFields() {
return createStringFields(new String[]{"id", "name", "balance", "address", "city", "state", "zipCode", "country"});
Expand Down Expand Up @@ -76,7 +76,7 @@ public void testParserPerformance() throws IOException, MalformedRecordException

@Test
public void testExceptionThrownOnParseProblem() {
CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setQuoteMode(QuoteMode.ALL).setTrim(true).setDelimiter(',').build();
CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setQuoteMode(QuoteMode.ALL).setTrim(true).setDelimiter(',').get();
final int NUM_LINES = 25;
StringBuilder sb = new StringBuilder("\"id\",\"name\",\"balance\"");
for (int i = 0; i < NUM_LINES; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

public class ITJacksonCSVRecordReader {

private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').build();
private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').get();

private List<RecordField> getDefaultFields() {
final List<RecordField> fields = new ArrayList<>();
Expand All @@ -51,13 +51,11 @@ private List<RecordField> getDefaultFields() {
public void testParserPerformance() throws IOException, MalformedRecordException {
// Generates about 130MB of data
final int NUM_LINES = 2500000;
StringBuilder sb = new StringBuilder("id,name,balance,address,city,state,zipCode,country\n");
for (int i = 0; i < NUM_LINES; i++) {
sb.append("1,John Doe,4750.89D,123 My Street,My City,MS,11111,USA\n");
}
String sb = "id,name,balance,address,city,state,zipCode,country\n" +
"1,John Doe,4750.89D,123 My Street,My City,MS,11111,USA\n".repeat(NUM_LINES);
final RecordSchema schema = new SimpleRecordSchema(getDefaultFields());

try (final InputStream bais = new ByteArrayInputStream(sb.toString().getBytes());
try (final InputStream bais = new ByteArrayInputStream(sb.getBytes());
final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@

public class TestCSVRecordReader {
private final DataType doubleDataType = RecordFieldType.DOUBLE.getDataType();
private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').build();
private final CSVFormat RFC4180WithTrim = CSVFormat.RFC4180.builder().setTrim(true).build();
private final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').get();
private final CSVFormat RFC4180WithTrim = CSVFormat.RFC4180.builder().setTrim(true).get();

private List<RecordField> getDefaultFields() {
final List<RecordField> fields = new ArrayList<>();
Expand Down Expand Up @@ -807,7 +807,7 @@ public void testDuplicateHeaderNames() throws IOException, MalformedRecordExcept
.setTrim(true)
.setQuote('"')
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
.build();
.get();

try (final InputStream bais = new ByteArrayInputStream(inputData)) {
final IllegalArgumentException iae = assertThrows(IllegalArgumentException.class, () -> createReader(bais, schema, disallowDuplicateHeadersFormat));
Expand Down Expand Up @@ -848,7 +848,7 @@ public void testDuplicateHeaderNames_withoutDoubleQuoteTrimming() throws IOExcep
}

// confirm duplicate headers cause an exception when requested
final CSVFormat disallowDuplicateHeadersFormat = RFC4180WithTrim.builder().setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).build();
final CSVFormat disallowDuplicateHeadersFormat = RFC4180WithTrim.builder().setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).get();
try (final InputStream bais = new ByteArrayInputStream(inputData)) {
final IllegalArgumentException iae = assertThrows(IllegalArgumentException.class, () -> createReader(bais, schema, disallowDuplicateHeadersFormat, false));
assertTrue(iae.getMessage().startsWith("The header contains a duplicate name"));
Expand All @@ -860,7 +860,7 @@ public void testMultipleRecordsDelimitedWithSpecialChar() throws IOException, Ma

char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);

final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(delimiter).build();
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(delimiter).get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand All @@ -884,7 +884,7 @@ public void testMultipleRecordsDelimitedWithSpecialChar() throws IOException, Ma
@Test
public void testMultipleRecordsEscapedWithChar() throws IOException {

final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setTrim(true).setQuote('"').setDelimiter(',').setEscape('\\').build();
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setTrim(true).setQuote('"').setDelimiter(',').setEscape('\\').get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand All @@ -900,7 +900,7 @@ public void testMultipleRecordsEscapedWithChar() throws IOException {
@Test
public void testMultipleRecordsEscapedWithNull() throws IOException, MalformedRecordException {

final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(',').setEscape(null).build();
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(',').setEscape(null).get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand Down Expand Up @@ -945,7 +945,7 @@ public void testMultipleRecordsEscapedWithNull_withoutDoubleQuoteTrimming() thro

@Test
public void testQuote() throws IOException, MalformedRecordException {
final CSVFormat format = CSVFormat.RFC4180.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').build();
final CSVFormat format = CSVFormat.RFC4180.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').get();
final String text = "\"name\"\n\"\"\"\"\"\"\"\"\n\"\"\"\"\"\"\"\"";

final List<RecordField> fields = new ArrayList<>();
Expand All @@ -968,7 +968,7 @@ record = reader.nextRecord(false, false);

@Test
public void testQuote_withoutDoubleQuoteTrimming() throws IOException, MalformedRecordException {
final CSVFormat format = CSVFormat.RFC4180.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').build();
final CSVFormat format = CSVFormat.RFC4180.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').get();
final String text = "\"name\"\n\"\"\"\"\"\"\"\"\n\"\"\"\"\"\"\"\"";

final List<RecordField> fields = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
public class TestFastCSVRecordReader {
private final DataType doubleDataType = RecordFieldType.DOUBLE.getDataType();
private CSVFormat format;
private static final CSVFormat TRIMMED_RFC4180 = CSVFormat.RFC4180.builder().setTrim(true).build();
private static final CSVFormat TRIMMED_RFC4180 = CSVFormat.RFC4180.builder().setTrim(true).get();

@BeforeEach
public void setUp() {
Expand Down Expand Up @@ -231,7 +231,7 @@ public void testMissingField_withoutDoubleQuoteTrimming() throws IOException, Ma
final byte[] inputData = csvData.getBytes();

try (final InputStream bais = new ByteArrayInputStream(inputData);
final FastCSVRecordReader reader = createReader(bais, schema, TRIMMED_RFC4180.builder().setAllowMissingColumnNames(true).build(), false)) {
final FastCSVRecordReader reader = createReader(bais, schema, TRIMMED_RFC4180.builder().setAllowMissingColumnNames(true).get(), false)) {

final Record record = reader.nextRecord();
assertNotNull(record);
Expand Down Expand Up @@ -367,7 +367,7 @@ public void testFieldInSchemaButNotHeader() throws IOException, MalformedRecordE
.setTrim(true)
.setIgnoreSurroundingSpaces(true)
.setAllowMissingColumnNames(true)
.build();
.get();

// Create another Record Reader that indicates that the header line is present but should be ignored. This should cause
// our schema to be the definitive list of what fields exist.
Expand Down Expand Up @@ -452,7 +452,7 @@ public void testMultipleRecordsDelimitedWithSpecialChar() throws IOException, Ma

char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);

final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(delimiter).build();
final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setSkipHeaderRecord(true).setTrim(true).setQuote('"').setDelimiter(delimiter).get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand Down Expand Up @@ -483,7 +483,7 @@ public void testMultipleRecordsEscapedWithNull() throws IOException, MalformedRe
.setQuote('"')
.setDelimiter(",")
.setEscape(null)
.build();
.get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ public class TestJacksonCSVRecordReader {
.setSkipHeaderRecord(true)
.setTrim(true)
.setQuote('"')
.build();
.get();
private final CSVFormat formatWithNullRecordSeparator = CSVFormat.DEFAULT.builder()
.setHeader()
.setSkipHeaderRecord(true)
.setTrim(true)
.setQuote('"')
.setRecordSeparator(null)
.build();
.get();
private final CSVFormat trimmed4180 = CSVFormat.RFC4180.builder()
.setTrim(true)
.build();
.get();
private final CSVFormat customFormat = CSVFormat.DEFAULT.builder()
.setHeader()
.setSkipHeaderRecord(true)
Expand All @@ -70,7 +70,7 @@ public class TestJacksonCSVRecordReader {
.setDelimiter(',')
.setEscape('\\')
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
.build();
.get();


private List<RecordField> getDefaultFields() {
Expand Down Expand Up @@ -671,7 +671,7 @@ public void testDuplicateHeaderNames_withoutDoubleQuoteTrimming() throws IOExcep
@Test
public void testMultipleRecordsDelimitedWithSpecialChar() throws IOException, MalformedRecordException {
final char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
final CSVFormat format = customFormat.builder().setDelimiter(delimiter).build();
final CSVFormat format = customFormat.builder().setDelimiter(delimiter).get();

final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
Expand All @@ -695,7 +695,7 @@ public void testMultipleRecordsDelimitedWithSpecialChar() throws IOException, Ma

@Test
public void testMultipleRecordsEscapedWithChar() throws IOException {
final CSVFormat format = customFormat.builder().setEscape('\\').build();
final CSVFormat format = customFormat.builder().setEscape('\\').get();
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);

Expand Down
Loading

0 comments on commit 2414046

Please sign in to comment.