Skip to content

Commit

Permalink
Make jpype optional
Browse files Browse the repository at this point in the history
  • Loading branch information
chezou committed Nov 19, 2023
1 parent dd9d111 commit d4e82da
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 26 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ Ensure you have a Java runtime and set the PATH for it.
pip install tabula-py
```

If you want to leverage faster execution with jpype, install with `jpype` extra.

```sh
pip install tabula-py[jpype]
```

### Example

tabula-py enables you to extract tables from a PDF into a DataFrame, or a JSON. It can also extract tables from a PDF and save the file as a CSV, a TSV, or a JSON.  
Expand Down
6 changes: 6 additions & 0 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ You can install tabula-py from PyPI with ``pip`` command.
pip install tabula-py
If you want to leverage faster execution with jpype, install with `jpype` extra.

.. code-block:: bash
pip install tabula-py[jpype]
.. Note::
conda recipe on conda-forge is not maintained by us.
We recommend installing via ``pip`` to use the latest version of tabula-py.
Expand Down
24 changes: 23 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,30 @@ def lint(session):


@nox.session
def tests(session):
@nox.parametrize(
"python,jpype",
[
("3.8", True),
("3.9", True),
("3.10", True),
("3.11", True),
#("3.12", False),
],
)
def tests(session, jpype):
if jpype:
tests_with_jpype(session)
else:
tests_without_jpype(session)


def tests_without_jpype(session):
session.install(".[test]")
session.run("pytest", "-v", "tests/test_read_pdf_table.py")


def tests_with_jpype(session):
session.install(".[jpype,test]")
session.run("pytest", "-v", "tests/test_read_pdf_table.py")
session.run("pytest", "-v", "tests/test_read_pdf_jar_path.py")
session.run("pytest", "-v", "tests/test_read_pdf_silent.py")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ dependencies = [
"pandas >= 0.25.3",
"numpy",
"distro",
"jpype1",
]
dynamic = ["version"]

[project.optional-dependencies]
jpype = ["jpype1"]
dev = [
"pytest",
"flake8",
Expand Down
36 changes: 18 additions & 18 deletions tabula/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from logging import getLogger
from typing import List, Optional

import jpype
import jpype.imports

from .errors import JavaNotFoundError
from .util import TabulaOption

Expand All @@ -27,35 +24,38 @@ def jar_path() -> str:

class TabulaVm:
def __init__(self, java_options: List[str], silent: Optional[bool]) -> None:
if not jpype.isJVMStarted():
jpype.addClassPath(jar_path())

# Workaround to enforce the silent option. See:
# https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157
if silent:
java_options.extend(
(
"-Dorg.slf4j.simpleLogger.defaultLogLevel=off",
"-Dorg.apache.commons.logging.Log"
"=org.apache.commons.logging.impl.NoOpLog",
try:
import jpype
import jpype.imports

if not jpype.isJVMStarted():
jpype.addClassPath(jar_path())

# Workaround to enforce the silent option. See:
# https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157
if silent:
java_options.extend(
(
"-Dorg.slf4j.simpleLogger.defaultLogLevel=off",
"-Dorg.apache.commons.logging.Log"
"=org.apache.commons.logging.impl.NoOpLog",
)
)
)

jpype.startJVM(*java_options, convertStrings=False)
jpype.startJVM(*java_options, convertStrings=False)

try:
import java.lang as lang
import technology.tabula as tabula
from org.apache.commons.cli import DefaultParser

self.tabula = tabula
self.parser = DefaultParser()
self.lang = lang

except (ModuleNotFoundError, ImportError) as e:
logger.warning(
"Error importing jpype dependencies. Fallback to subprocess."
)
logger.warning(jpype.java.lang.System.getProperty("java.class.path"))
logger.warning(e)
self.tabula = None
self.parse = None
Expand Down
3 changes: 2 additions & 1 deletion tests/test_read_pdf_jar_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from subprocess import CalledProcessError
from unittest.mock import patch

import jpype
import pytest

import tabula
Expand All @@ -19,5 +20,5 @@ def test_read_pdf_with_jar_path(self, jar_func):
# Fallback to subprocess
with pytest.raises(CalledProcessError):
tabula.read_pdf(self.pdf_path, encoding="utf-8")
file_name = Path(tabula.backend.jpype.getClassPath()).name
file_name = Path(jpype.getClassPath()).name
self.assertEqual(file_name, "tabula-java.jar")
7 changes: 2 additions & 5 deletions tests/test_read_pdf_silent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@
import unittest
from unittest.mock import patch

import pytest

import tabula


class TestReadPdfJarPath(unittest.TestCase):
def setUp(self):
self.pdf_path = "tests/resources/data.pdf"

@patch("tabula.backend.jpype.startJVM")
@patch("jpype.startJVM")
def test_read_pdf_with_silent_true(self, jvm_func):
with pytest.raises(RuntimeError):
tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True)
tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True)

target_args = []
if platform.system() == "Darwin":
Expand Down

0 comments on commit d4e82da

Please sign in to comment.