diff --git a/README.md b/README.md index 6631980..5a8bab7 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # ods-reader -You want to import very large OpenDocument tables (ODS file) and worry about memory usage? In that case the opendocument-reader might be interesting for you. Instead of loading the entire document, it is a pull-based reader. +You want to import very large OpenDocument tables (ODS file) and worry about memory usage? In that case the ods-reader might be interesting for you. Instead of loading the entire document, it is a pull-based reader. It therefore has a very low memory consumption, even with huge documents. ## Usage -Here is an example how to use the opendocument-reader: +Here is an example how to use the ods-reader: ```java Document doc = new Document(new File("myTable.ods")); Table table = doc.nextTable(); @@ -15,3 +15,27 @@ Here is an example how to use the opendocument-reader: } ``` + +## Cell types + +ODS distinguishes between the value that is displayed for humans and the machine-readable value. The value that is displayed for humans depends on the language selected for the document. The method `getContent()` is used to get the language depending human-readable value. + +The machine-reable value can be read using specialized methods that return suitable Java objects. Here is an example: + +```java +if ("date".equals(cell.getValueType())) { + if( cell.isDateTime() ) { + LocalDateTime dateTime = cell.asDateTime(); + } else { + LocalDate date = cell.asDate(); + } +} else if ("boolean".equals(cell.getValueType())) { + boolean b = cell.asBoolean(); +} else if ("time".equals(cell.getValueType())) { + LocalTime time = cell.asTime(); +} else if (cell.getValue() != null) { + result = cell.getValue(); +} else { + result = cell.getContent(); +} +``` \ No newline at end of file diff --git a/pom.xml b/pom.xml index 6f7b3a5..e2efcc0 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ de.zedlitz OpenDocument reader ods-reader - 1.1 + 1.0 streaming reader for OpenDocument files https://github.com/jze/opendocument-reader diff --git a/src/main/java/de/zedlitz/opendocument/Cell.java b/src/main/java/de/zedlitz/opendocument/Cell.java index 4587389..ac8ebdf 100644 --- a/src/main/java/de/zedlitz/opendocument/Cell.java +++ b/src/main/java/de/zedlitz/opendocument/Cell.java @@ -96,18 +96,34 @@ private static String getColumnName(int columnIndex) { return columnName.toString(); } + /** + * Get the raw value of the currency attribute. It is only present for cells with the type "currency". + * @return the raw currency value or null if not present. + */ public String getCurrency() { return currency; } + /** + * Get the raw value of the time-value attribute. It is only present for cells with the type "time". + * @return the raw time value or null if not present. + */ public String getTimeValue() { return timeValue; } + /** + * Get the raw value of the boolean-value attribute. It is only present for cells with the type "boolean". + * @return the raw boolean value or null if not present. + */ public String getBooleanValue() { return booleanValue; } + /** + * Get the raw value of the date-value attribute. It is only present for cells with the type "date". + * @return the raw date value or null if not present. + */ public String getDateValue() { return dateValue; } @@ -123,7 +139,8 @@ private void skipNote(final XMLStreamReader parser) } /** - * @return Returns the valueType. + * Get the raw value of the value-type attribute. It should be present for every cell. + * @return Returns the valueType or null if not present. */ public String getValueType() { return this.valueType; @@ -133,7 +150,7 @@ public String getValueType() { * Returns the content of the cell formatted for the locale of the file. For example in a German ods file the * boolean value true will be returned as WAHR. In a French ods file it will be * VRAI. And in an English ods file it will be TRUE. - *

+ *

* If you are looking for a language independent value you can use the getValue method. */ public String getContent() { @@ -152,7 +169,14 @@ public String toString() { return String.format("[%s \"%s\"]", getValueType(), getContent()); } - public Boolean asBoolean() { + /** + * Return the boolean value of the cell. This works only for cells with the type "boolean". + * Check for "boolean".equals(cell.getValueType()) before invoking this method. + * + * @return a {@link LocalDate} object with the value of the cell. + * @throws OdsReaderException is the cell is not a boolean cell + */ + public boolean asBoolean() { if ("boolean".equals(valueType) && StringUtils.isNotEmpty(booleanValue)) { return Boolean.valueOf(booleanValue); } @@ -160,6 +184,14 @@ public Boolean asBoolean() { throw new OdsReaderException("Wrong cell type " + valueType + " for boolean value"); } + /** + * Return a {@link LocalDate} object with the value of the cell. This works only for cells with the type "date" and + * do not have a time component. + * Check for "date".equals(cell.getValueType()) before invoking this method. + * + * @return a {@link LocalDate} object with the value of the cell. + * @throws OdsReaderException is the cell is not a date cell + */ public LocalDate asDate() { if ("date".equals(valueType) && StringUtils.isNotEmpty(dateValue)) { return LocalDate.parse(dateValue); @@ -168,6 +200,14 @@ public LocalDate asDate() { throw new OdsReaderException("Wrong cell type " + valueType + " for date value"); } + /** + * Return a {@link LocalDateTime} object with the value of the cell. This works only for cells with the type "date". + * Check for "date".equals(cell.getValueType()) before invoking this method. + * If the cell contains only a date and no time the time 00:00:00 will be used as the time component. + * + * @return a {@link LocalDateTime} object with the value of the cell. + * @throws OdsReaderException is the cell is not a date cell + */ public LocalDateTime asDateTime() { if ("date".equals(valueType) && StringUtils.isNotEmpty(dateValue)) { if (dateValue.contains("T")) { @@ -175,13 +215,27 @@ public LocalDateTime asDateTime() { return LocalDateTime.parse(dateValue, DateTimeFormatter.ISO_DATE_TIME); } else { // only date - return LocalDateTime.parse(dateValue+"T00:00:00", DateTimeFormatter.ISO_DATE_TIME); + return LocalDateTime.parse(dateValue + "T00:00:00", DateTimeFormatter.ISO_DATE_TIME); } } throw new OdsReaderException("Wrong cell type " + valueType + " for date value"); } + /** + * Does the cell contain a value that consists of a date and a time? + */ + public boolean isDateTime() { + return "date".equals(valueType) && StringUtils.contains(dateValue, "T"); + } + + /** + * Return a {@link LocalTime} object with the value of the cell. This works only for cells with the type "time". + * Check for "time".equals(cell.getValueType()) before invoking this method. + * + * @return a {@link LocalTime} object with the value of the cell. + * @throws OdsReaderException is the cell is not a time cell + */ public LocalTime asTime() { if ("time".equals(valueType) && StringUtils.isNotEmpty(timeValue)) { Duration duration = Duration.parse(timeValue); @@ -200,7 +254,12 @@ public String getAddress() { } /** - * Returns the language independent value of a cell. + * Returns the language independent value of a cell. This is only present for cell with the type float, currency, + * and percentage. It is stored in the value attribute of the cell element. + *

+ * If getValue() is null you can use getContent() to get the text value of the cell. + * + * @return the language independent value of a cell or null if not value is present. */ public String getValue() { return value; diff --git a/src/main/java/de/zedlitz/opendocument/CellType.java b/src/main/java/de/zedlitz/opendocument/CellType.java deleted file mode 100644 index f3965b0..0000000 --- a/src/main/java/de/zedlitz/opendocument/CellType.java +++ /dev/null @@ -1,13 +0,0 @@ -package de.zedlitz.opendocument; - -public enum CellType { - NUMBER, - STRING, - FORMULA, - ERROR, - BOOLEAN, - EMPTY; - - CellType() { - } -} diff --git a/src/main/java/de/zedlitz/opendocument/Document.java b/src/main/java/de/zedlitz/opendocument/Document.java index 0b66d83..276abc3 100644 --- a/src/main/java/de/zedlitz/opendocument/Document.java +++ b/src/main/java/de/zedlitz/opendocument/Document.java @@ -108,7 +108,7 @@ public void eachTable(final Consumer c) { } } - public Optional getSheet(int i) { + public Optional
getTable(int i) { int count = 0; Table nextTable = this.nextTable(); while (nextTable != null) { @@ -120,4 +120,8 @@ public Optional getSheet(int i) { } return Optional.empty(); } + + public Optional getSheet(int i) { + return getTable(i).map(it -> it); + } } diff --git a/src/main/java/de/zedlitz/opendocument/ReadableWorkbook.java b/src/main/java/de/zedlitz/opendocument/ReadableWorkbook.java deleted file mode 100644 index ad01ae2..0000000 --- a/src/main/java/de/zedlitz/opendocument/ReadableWorkbook.java +++ /dev/null @@ -1,15 +0,0 @@ -package de.zedlitz.opendocument; - -import javax.xml.stream.XMLStreamException; -import java.io.File; -import java.io.IOException; - -/** - * Just a wrapper for the {@link Document} class to be a drop-in replacement for fastexcel. - */ -public class ReadableWorkbook extends Document { - - public ReadableWorkbook(File odsFile, ReadingOptions readingOptions) throws XMLStreamException, IOException { - super(odsFile); - } -} diff --git a/src/main/java/de/zedlitz/opendocument/ReadingOptions.java b/src/main/java/de/zedlitz/opendocument/ReadingOptions.java deleted file mode 100644 index 35e11a1..0000000 --- a/src/main/java/de/zedlitz/opendocument/ReadingOptions.java +++ /dev/null @@ -1,20 +0,0 @@ -package de.zedlitz.opendocument; - -public class ReadingOptions { - public static final ReadingOptions DEFAULT_READING_OPTIONS = new ReadingOptions(false, false); - private final boolean withCellFormat; - private final boolean cellInErrorIfParseError; - - public ReadingOptions(boolean withCellFormat, boolean cellInErrorIfParseError) { - this.withCellFormat = withCellFormat; - this.cellInErrorIfParseError = cellInErrorIfParseError; - } - - public boolean isWithCellFormat() { - return this.withCellFormat; - } - - public boolean isCellInErrorIfParseError() { - return this.cellInErrorIfParseError; - } -} diff --git a/src/main/java/de/zedlitz/opendocument/Table.java b/src/main/java/de/zedlitz/opendocument/Table.java index 11f66c9..347a12b 100644 --- a/src/main/java/de/zedlitz/opendocument/Table.java +++ b/src/main/java/de/zedlitz/opendocument/Table.java @@ -18,6 +18,7 @@ */ public class Table implements Sheet, Iterable { static final QName ELEMENT_TABLE = new QName(Document.NS_TABLE, "table"); + private static final String ATTRIBUTE_NAME = "name"; private final XMLStreamReader xpp; private String name; @@ -35,8 +36,7 @@ private boolean isTableEndElement(int eventType) { private boolean isRowStartElement(int eventType) { return eventType == XMLStreamConstants.START_ELEMENT - && Row.ELEMENT_ROW.equals(xpp.getName()) - && Document.NS_TABLE.equals(xpp.getNamespaceURI()); + && Row.ELEMENT_ROW.equals(xpp.getName()); } public final Row nextRow() { diff --git a/src/test/java/de/zedlitz/opendocument/CellTest.java b/src/test/java/de/zedlitz/opendocument/CellTest.java index 42417e7..82ade10 100644 --- a/src/test/java/de/zedlitz/opendocument/CellTest.java +++ b/src/test/java/de/zedlitz/opendocument/CellTest.java @@ -17,6 +17,24 @@ public class CellTest extends AbstractBaseTest { private static final String CONTENT_EMPTY_CELL = ""; + /** + * A date cell without a date-value attribute. I don't think this situation can happen in real live. + */ + private static final String CONTENT_MISSING_DATE_VALUE = + ""; + + /** + * A time cell without a time-value attribute. I don't think this situation can happen in real live. + */ + private static final String CONTENT_MISSING_TIME_VALUE = + ""; + + /** + * A boolean cell without a time-value attribute. I don't think this situation can happen in real live. + */ + private static final String CONTENT_MISSING_BOOLEAN_VALUE = + ""; + private static final String BROKEN_XML_CONTENT = ""; @@ -131,6 +149,16 @@ public void testGetDateValue() throws Exception { assertEquals("1999-12-31T07:35:02", cells.get(22).getDateValue()); assertEquals("1899-12-30T13:37:46", cells.get(28).getDateValue()); } + + @Test + public void testIsDateTime() throws Exception { + List cells = getCellsFromDemoFile("/formats_german.ods"); + assertFalse(cells.get(0).isDateTime()); + assertFalse(cells.get(11).isDateTime()); + assertTrue(cells.get(22).isDateTime()); + assertTrue(cells.get(28).isDateTime()); + } + @Test public void testAsBoolean() throws Exception { List cells = getCellsFromDemoFile("/formats_french.ods"); @@ -176,4 +204,18 @@ public void testGetValue() throws Exception { assertEquals("0.1295", cells.get(7).getValue()); assertEquals("120.5", cells.get(8).getValue()); } + + @Test + public void invalidCells() throws XMLStreamException { + Cell cell = new Cell(advanceToStartTag(createParser(CONTENT_MISSING_DATE_VALUE)), new DummyRow(), 0); + assertThrows(OdsReaderException.class, cell::asDate); + assertThrows(OdsReaderException.class, cell::asDateTime); + + cell = new Cell(advanceToStartTag(createParser(CONTENT_MISSING_TIME_VALUE)), new DummyRow(), 0); + assertThrows(OdsReaderException.class, cell::asTime); + + cell = new Cell(advanceToStartTag(createParser(CONTENT_MISSING_BOOLEAN_VALUE)), new DummyRow(), 0); + assertThrows(OdsReaderException.class, cell::asBoolean); + + } } diff --git a/src/test/java/de/zedlitz/opendocument/DocumentTest.java b/src/test/java/de/zedlitz/opendocument/DocumentTest.java index 51cf181..2458de2 100644 --- a/src/test/java/de/zedlitz/opendocument/DocumentTest.java +++ b/src/test/java/de/zedlitz/opendocument/DocumentTest.java @@ -53,8 +53,7 @@ public void testEmptyTable() throws Exception { @Test public void testEmptyTableCheckRows() throws Exception { - final Document doc = new Document(this - .createParser(CONTENT_ONE_EMPTY_TABLE)); + final Document doc = new Document(this.createParser(CONTENT_ONE_EMPTY_TABLE)); final Table tab = doc.nextTable(); assertNotNull(tab, "one table"); assertNull(tab.nextRow(), "no row"); @@ -63,8 +62,7 @@ public void testEmptyTableCheckRows() throws Exception { @Test public void testSkipRows() throws Exception { - final Document doc = new Document(this - .createParser(CONTENT_ONE_TWO_ROWS)); + final Document doc = new Document(this.createParser(CONTENT_ONE_TWO_ROWS)); final Table tab1 = doc.nextTable(); assertNotNull(tab1, "1st table"); @@ -77,8 +75,7 @@ public void testSkipRows() throws Exception { @Test public void testReadRows() throws Exception { - final Document doc = new Document(this - .createParser(CONTENT_ONE_TWO_ROWS)); + final Document doc = new Document(this .createParser(CONTENT_ONE_TWO_ROWS)); final Table tab1 = doc.nextTable(); assertNotNull(tab1, "1st table"); @@ -213,4 +210,9 @@ public void getSheet_notExisting() throws XMLStreamException, IOException { assertFalse(sheet.isPresent()); } + @Test + public void constructor_emptyZipFile() throws XMLStreamException, IOException { + Document doc = new Document(getClass().getResourceAsStream("/empty.zip")); + } + } diff --git a/src/test/java/de/zedlitz/opendocument/EmptyCellTest.java b/src/test/java/de/zedlitz/opendocument/EmptyCellTest.java new file mode 100644 index 0000000..a471ae8 --- /dev/null +++ b/src/test/java/de/zedlitz/opendocument/EmptyCellTest.java @@ -0,0 +1,25 @@ +package de.zedlitz.opendocument; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class EmptyCellTest { + + private final Cell cell = new EmptyCell(new DummyRow(), 0); + + @Test + void getContent() { + assertEquals("", cell.getContent()); + } + + @Test + void getValueType() { + assertEquals("undefined", cell.getValueType()); + } + + @Test + void getNumberColumnsRepeated() { + assertEquals(0, cell.getNumberColumnsRepeated()); + } +} \ No newline at end of file diff --git a/src/test/java/de/zedlitz/opendocument/TableTest.java b/src/test/java/de/zedlitz/opendocument/TableTest.java index adb67eb..46d1761 100644 --- a/src/test/java/de/zedlitz/opendocument/TableTest.java +++ b/src/test/java/de/zedlitz/opendocument/TableTest.java @@ -29,7 +29,7 @@ public class TableTest extends AbstractBaseTest { private static final String BROKEN_XML_CONTENT = ""; - + @Test public void testEmptyTable() throws Exception { final Table table = new Table(advanceToStartTag(createParser(CONTENT_EMPTY_TABLE))); @@ -96,6 +96,5 @@ void openStream() throws XMLStreamException { Stream stream = table.openStream(); List rows = stream.collect(Collectors.toList()); assertEquals(2, rows.size()); - } } diff --git a/src/test/resources/empty.zip b/src/test/resources/empty.zip new file mode 100644 index 0000000..15cb0ec Binary files /dev/null and b/src/test/resources/empty.zip differ