From 3b82c83414f97c835c318ac328783ae07c3c42e9 Mon Sep 17 00:00:00 2001 From: lopez Date: Sun, 13 Nov 2022 22:13:19 +0100 Subject: [PATCH] fix possible missing start token position in block --- .../src/main/java/org/grobid/core/sax/PDFALTOSaxHandler.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/grobid-core/src/main/java/org/grobid/core/sax/PDFALTOSaxHandler.java b/grobid-core/src/main/java/org/grobid/core/sax/PDFALTOSaxHandler.java index f5be3a8e7f..11f4201146 100755 --- a/grobid-core/src/main/java/org/grobid/core/sax/PDFALTOSaxHandler.java +++ b/grobid-core/src/main/java/org/grobid/core/sax/PDFALTOSaxHandler.java @@ -260,6 +260,7 @@ else if (qName.equals("TextBlock")) { //page.addBlock(block); } block = new Block(); + block.setStartToken(tokenizations.size()); //block.setPage(currentPage); blabla = new StringBuffer(); int imagePos = images.size()-1; @@ -393,6 +394,7 @@ public void startElement(String namespaceURI, String localName, block = new Block(); blabla = new StringBuffer(); nbTokens = 0; + block.setStartToken(tokenizations.size()); //block.setPage(currentPage); // blabla.append("\n@block\n"); } else if (qName.equals("Illustration")) {