diff --git a/src/intTest/java/com/box/sdk/BoxFileIT.java b/src/intTest/java/com/box/sdk/BoxFileIT.java index b4e675d55..6fa76e409 100644 --- a/src/intTest/java/com/box/sdk/BoxFileIT.java +++ b/src/intTest/java/com/box/sdk/BoxFileIT.java @@ -146,6 +146,27 @@ public void getRepresentationContentSucceeds() throws InterruptedException { } } + @Test + public void getRepresentationContentWithExtractedTextSucceeds() throws InterruptedException { + BoxAPIConnection api = jwtApiForServiceAccount(); + String fileName = "text.pdf"; + BoxFile file = null; + try { + file = uploadSampleFileToUniqueFolder(api, fileName); + final String fileId = file.getID(); + String representationHint = "[extracted_text]"; + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Retry.retry(() -> { + new BoxFile(api, fileId).getRepresentationContent(representationHint, outputStream); + byte[] downloadedRepresentationContent = outputStream.toByteArray(); + String text = new String(downloadedRepresentationContent, StandardCharsets.UTF_8); + assertTrue(text.contains("Lorem ipsum")); + }, 5, 100); + } finally { + deleteFile(file); + } + } + @Test public void uploadFileStreamSucceeds() { BoxAPIConnection api = jwtApiForServiceAccount(); @@ -156,7 +177,7 @@ public void uploadFileStreamSucceeds() { BoxFile uploadedFile = null; try { - InputStream uploadStream = new ByteArrayInputStream(fileContent); + InputStream uploadStream = new ByteArrayInputStream(fileContent); BoxFile.Info uploadedFileInfo = folder.uploadFile(uploadStream, BoxFileIT.generateString()); uploadedFile = uploadedFileInfo.getResource(); @@ -552,11 +573,11 @@ public void canPaginateOverListOfVersions() { byte[] fileBytes = "Version 2".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); fileBytes = "Version 3".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); Collection versionsPart1 = uploadedFile.getVersionsRange(0, 1); assertThat(versionsPart1.size(), is(1)); diff --git a/src/main/java/com/box/sdk/BinaryBodyUtils.java b/src/main/java/com/box/sdk/BinaryBodyUtils.java index 002b3b5a6..108f699a9 100644 --- a/src/main/java/com/box/sdk/BinaryBodyUtils.java +++ b/src/main/java/com/box/sdk/BinaryBodyUtils.java @@ -1,5 +1,6 @@ package com.box.sdk; +import com.box.sdk.http.HttpHeaders; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -73,12 +74,43 @@ static void writeStreamWithContentLength(BoxAPIResponse response, OutputStream o } else { input = response.getBody(); } - writeStreamTo(input, output, response.getContentLength()); + writeStreamTo(input, output, getContentLengthFromAPIResponse(response)); } finally { response.close(); } } + /** + * Get the content length from the API response. + * In some cases, the Content-Length is not provided in the response headers. + * This could happen when getting the content representation for a compressed data. + * In that case the API will switch to chunk mode and provide the length in the "X-Original-Content-Length" header. + * + * @param response API response. + * @return Content length. + */ + private static long getContentLengthFromAPIResponse(BoxAPIResponse response) { + long length = response.getContentLength(); + if (length == -1) { + String headerValue = null; + if (response.getHeaders().containsKey(HttpHeaders.CONTENT_LENGTH)) { + headerValue = response.getHeaders().get(HttpHeaders.CONTENT_LENGTH).get(0); + } else if (response.getHeaders().containsKey(HttpHeaders.X_ORIGINAL_CONTENT_LENGTH)) { + headerValue = response.getHeaders().get(HttpHeaders.X_ORIGINAL_CONTENT_LENGTH).get(0); + } + + if (headerValue != null) { + try { + length = Integer.parseInt(headerValue); + } catch (NumberFormatException e) { + throw new RuntimeException("Invalid content length: " + headerValue); + } + } + } + + return length; + } + /** * Writes content of input stream to provided output. * @@ -126,8 +158,8 @@ static void writeStreamTo(InputStream input, OutputStream output, long expectedL totalBytesRead += n; // Track the total bytes read } if (totalBytesRead != expectedLength) { - throw new IOException("Stream ended prematurely. Expected " + expectedLength - + " bytes, but read " + totalBytesRead + " bytes."); + throw new IOException("Stream ended prematurely. Expected " + + expectedLength + " bytes, but read " + totalBytesRead + " bytes."); } } catch (IOException e) { throw new RuntimeException("Error during streaming: " + e.getMessage(), e); diff --git a/src/main/java/com/box/sdk/http/HttpHeaders.java b/src/main/java/com/box/sdk/http/HttpHeaders.java index 6def10f46..945b3d406 100644 --- a/src/main/java/com/box/sdk/http/HttpHeaders.java +++ b/src/main/java/com/box/sdk/http/HttpHeaders.java @@ -10,6 +10,11 @@ public final class HttpHeaders { */ public static final String CONTENT_LENGTH = "Content-Length"; + /** + * HTTP header key X-Original-Content-Length. + */ + public static final String X_ORIGINAL_CONTENT_LENGTH = "X-Original-Content-Length"; + /** * HTTP header key Content-Type. */ diff --git a/src/test/resources/sample-files/text.pdf b/src/test/resources/sample-files/text.pdf new file mode 100644 index 000000000..fec405395 Binary files /dev/null and b/src/test/resources/sample-files/text.pdf differ