diff --git a/.github/workflows/test-kotlin-samples.yml b/.github/workflows/test-kotlin-samples.yml
index 2e72b8e..e8f4431 100644
--- a/.github/workflows/test-kotlin-samples.yml
+++ b/.github/workflows/test-kotlin-samples.yml
@@ -17,7 +17,7 @@ jobs:
strategy:
matrix:
os: [windows-latest, ubuntu-latest]
- dir: ['TextExtract', 'FlattenTransparency', 'SplitPDF', 'ConvertToOffice', 'MergePDF', 'ListWords', 'PDFOptimize', 'PDFAConverter', 'Redactions', 'Watermark', 'RegexTextSearch']
+ dir: ['TextExtract', 'FlattenTransparency', 'SplitPDF', 'ConvertToOffice', 'MergePDF', 'ListWords', 'PDFOptimize', 'PDFAConverter', 'Redactions', 'Watermark', 'RegexTextSearch', 'RegexExtractText']
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -62,3 +62,4 @@ jobs:
${{matrix.dir}}/*.docx
${{matrix.dir}}/*.xlsx
${{matrix.dir}}/*.pptx
+ ${{matrix.dir}}/*.json
diff --git a/RegexExtractText/.idea/runConfigurations/RegexExtractText.xml b/RegexExtractText/.idea/runConfigurations/RegexExtractText.xml
new file mode 100644
index 0000000..b758f36
--- /dev/null
+++ b/RegexExtractText/.idea/runConfigurations/RegexExtractText.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/RegexExtractText/pom.xml b/RegexExtractText/pom.xml
new file mode 100644
index 0000000..525c870
--- /dev/null
+++ b/RegexExtractText/pom.xml
@@ -0,0 +1,236 @@
+
+
+ 4.0.0
+ com.datalogics.pdfl.samples
+ RegexExtractText
+ 1.0-SNAPSHOT
+
+
+
+ mavenCentral
+ https://repo1.maven.org/maven2/
+
+
+
+
+ UTF-8
+ official
+ 1.8
+ 1.8
+ 1.8
+
+
+
+
+ Windows64
+
+
+ windows
+ amd64
+
+
+
+ win-x86-64-jni
+
+
+
+ MacArm
+
+
+ mac
+ aarch64
+
+
+
+ mac-arm-64-jni
+
+
+
+ Linux64
+
+
+
+ Linux
+ amd64
+
+
+
+ linux-x86-64-jni
+
+
+
+
+
+
+ org.jetbrains.kotlin
+ kotlin-stdlib-jdk8
+ 1.9.21
+
+
+ com.datalogics.pdfl
+ pdfl
+ 18.31.0
+ pom
+
+
+ com.datalogics.pdfl
+ pdfl
+ 18.31.0
+
+
+ com.datalogics.pdfl
+ pdfl
+ 18.31.0
+ zip
+ ${jni.classifier}
+
+
+ com.datalogics.pdfl
+ pdfl
+ 18.31.0
+ zip
+ resources
+
+
+ com.datalogics.pdfl
+ pdfl
+ 18.31.0
+ javadoc
+
+
+ org.json
+ json
+ [20230227,)
+
+
+
+
+ src/main/kotlin
+
+
+ org.jetbrains.kotlin
+ kotlin-maven-plugin
+ 1.9.21
+
+
+ compile
+ compile
+
+ compile
+
+
+
+
+
+ maven-surefire-plugin
+ 2.22.2
+
+
+ maven-failsafe-plugin
+ 2.22.2
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.6.0
+
+ RegexExtractText
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ unpack-resources
+ generate-resources
+
+ unpack
+
+
+
+
+ com.datalogics.pdfl
+ pdfl
+ resources
+ zip
+ ${project.build.directory}/lib/Resources
+
+
+
+
+
+ unpack-jni
+ generate-resources
+
+ unpack
+
+
+
+
+ com.datalogics.pdfl
+ pdfl
+ ${jni.classifier}
+ zip
+ ${project.build.directory}/lib
+
+
+
+
+
+ unpack-license
+ generate-resources
+
+ unpack
+
+
+
+
+ com.datalogics.pdfl
+ pdfl
+ license
+ zip
+ ${project.build.directory}/lib
+
+
+
+
+
+
+
+ maven-assembly-plugin
+
+
+ package
+
+ single
+
+
+
+
+
+
+ true
+ com.datalogics.pdfl.samples.RegexExtractTextKt
+
+
+
+ jar-with-dependencies
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 3.0.2
+
+
+
+
+
+
\ No newline at end of file
diff --git a/RegexExtractText/src/main/kotlin/com/datalogics/pdfl/samples/RegexExtractText.kt b/RegexExtractText/src/main/kotlin/com/datalogics/pdfl/samples/RegexExtractText.kt
new file mode 100644
index 0000000..eee2b79
--- /dev/null
+++ b/RegexExtractText/src/main/kotlin/com/datalogics/pdfl/samples/RegexExtractText.kt
@@ -0,0 +1,139 @@
+package com.datalogics.pdfl.samples
+
+import com.datalogics.PDFL.*
+import org.json.JSONArray
+import org.json.JSONObject
+import java.io.FileWriter
+
+/*
+ *
+ * This sample demonstrates using DocTextFinder to find instances of a phrase
+ * that matches a user-supplied regular expression. The output is a JSON file that
+ * has the match information.
+ *
+ * Copyright (c) 2024, Datalogics, Inc. All rights reserved.
+ *
+ */
+
+// This Datalogics sample uses the org.json (JSON-Java) library to generate JSON output. Below is the JSON license for the org.json software:
+/*
+Copyright (c) 2002 JSON.org
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+The Software shall be used for Good, not Evil.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
+*/
+
+fun main(args: Array) {
+ println("RegexExtractText sample:")
+
+ val lib = Library()
+
+ try {
+
+ val sInput =
+ if (args.isNotEmpty()){
+ args[0]
+ } else {
+ Library.getResourceDirectory() + "Sample_Input/RegexExtractText.pdf"
+ }
+
+ val sOutput = "RegexExtractText-out.json"
+
+ // Phone numbers
+ val sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})"
+ // Email addresses
+ //val sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)"
+ // URLs
+ //val sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))"
+
+ println("Reading $sInput")
+
+ val doc = Document(sInput)
+
+ // This array will hold the JSON stream that we will print to the output JSON file.
+ val result = JSONArray()
+
+ val nPages = doc.numPages
+
+ println("Opened document $sInput")
+
+ val wordConfig = WordFinderConfig()
+
+ // Need to set this to true so phrases will be concatenated properly.
+ wordConfig.noHyphenDetection = true
+
+ val docTextFinder = DocTextFinder(doc, wordConfig)
+
+ val docMatches = docTextFinder.getMatchList(0, nPages - 1, sRegex)
+
+ for (wInfo in docMatches) {
+ // This JSON object will store the match phrase and an array of quads for the match.
+ val matchObject = JSONObject()
+
+ // This JSON array will store the page number and quad location for each match quad.
+ val matchQuadInformation = JSONArray()
+
+ // Set the match phrase in the JSON object.
+ matchObject.put("match-phrase", wInfo.matchString)
+
+ // Get the word quads.
+ val quadList = wInfo.quadInfo
+
+ // Iterate through the quad info.
+ for (qInfo in quadList) {
+ for (quad in qInfo.quads) {
+ // Get the coordinates of the quad and set the quad coordinates in JSON objects.
+ val topLeft = JSONObject()
+ topLeft.put("x", quad.topLeft.h)
+ topLeft.put("y", quad.topLeft.v)
+
+ val bottomLeft = JSONObject()
+ bottomLeft.put("x", quad.bottomLeft.h)
+ bottomLeft.put("y", quad.bottomLeft.v)
+
+ val topRight = JSONObject()
+ topRight.put("x", quad.topRight.h)
+ topRight.put("y", quad.topRight.v)
+
+ val bottomRight = JSONObject()
+ bottomRight.put("x", quad.bottomRight.h)
+ bottomRight.put("y", quad.bottomRight.v)
+
+ // Use the quad coordinate JSON objects to form a single JSON object that holds match quad location information.
+ val quadLocation = JSONObject()
+ quadLocation.put("bottom-left", bottomLeft)
+ quadLocation.put("bottom-right", bottomRight)
+ quadLocation.put("top-left", topLeft)
+ quadLocation.put("top-right", topRight)
+
+ val quadInformationObject = JSONObject()
+ quadInformationObject.put("page-number", qInfo.pageNum)
+ quadInformationObject.put("quad-location", quadLocation)
+
+ // Insert the match's page number and quad location(s) in the matchQuadInformation JSON array.
+ matchQuadInformation.put(quadInformationObject)
+ }
+ }
+
+ // Set the match's quad information in the matchObject.
+ matchObject.put("match-quads", matchQuadInformation)
+
+ result.put(matchObject)
+ }
+
+ // Write the match information to the output JSON file.
+ println("Writing JSON to $sOutput")
+ FileWriter(sOutput).use { file -> file.write(result.toString(4)) }
+
+ doc.close()
+
+ } finally {
+ lib.delete()
+ }
+}