forked from Byaidu/PDFMathTranslate
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'Byaidu:main' into main
- Loading branch information
Showing
10 changed files
with
280 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: Format Code with Black | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
lint: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: psf/black@stable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,13 +22,16 @@ params = { | |
'lang_out': 'zh', | ||
'service': 'google', | ||
'thread': 4, | ||
} | ||
} | ||
``` | ||
Translate with files: | ||
```python | ||
(file_mono, file_dual) = translate(files=['example.pdf'], **params)[0] | ||
|
||
``` | ||
Translate with stream: | ||
```python | ||
with open('example.pdf', 'rb') as f: | ||
(stream_mono, stream_dual) = translate_stream(stream=f.read(), | ||
**params) | ||
|
||
(stream_mono, stream_dual) = translate_stream(stream=f.read(), **params) | ||
``` | ||
|
||
[⬆️ Back to top](#toc) | ||
|
@@ -39,7 +42,7 @@ with open('example.pdf', 'rb') as f: | |
|
||
In a more flexible way, you can communicate with the program using HTTP protocols, if: | ||
|
||
1. You have the backend installed & running | ||
1. Install and run backend | ||
|
||
```bash | ||
pip install pdf2zh[backend] | ||
|
@@ -49,7 +52,7 @@ In a more flexible way, you can communicate with the program using HTTP protocol | |
|
||
2. Using HTTP protocols as follows: | ||
|
||
- Translate | ||
- Submit translate task | ||
|
||
```bash | ||
curl http://localhost:11008/v1/translate -F "[email protected]" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}" | ||
|
@@ -70,19 +73,19 @@ In a more flexible way, you can communicate with the program using HTTP protocol | |
{"state":"SUCCESS"} | ||
``` | ||
|
||
- Specifying output | ||
- Save monolingual file | ||
|
||
```bash | ||
curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf | ||
``` | ||
|
||
- Specifying the output as a bilingual file | ||
- Save bilingual file | ||
|
||
```bash | ||
curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf | ||
``` | ||
|
||
- Or delete it after the whole process | ||
- Interrupt if running and delete the task | ||
```bash | ||
curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import unittest | ||
from unittest.mock import Mock, patch, MagicMock | ||
from pdfminer.layout import LTPage, LTChar, LTLine | ||
from pdfminer.pdfinterp import PDFResourceManager | ||
from pdf2zh.converter import PDFConverterEx, TranslateConverter | ||
|
||
|
||
class TestPDFConverterEx(unittest.TestCase): | ||
def setUp(self): | ||
self.rsrcmgr = PDFResourceManager() | ||
self.converter = PDFConverterEx(self.rsrcmgr) | ||
|
||
def test_begin_page(self): | ||
mock_page = Mock() | ||
mock_page.pageno = 1 | ||
mock_page.cropbox = (0, 0, 100, 200) | ||
mock_ctm = [1, 0, 0, 1, 0, 0] | ||
self.converter.begin_page(mock_page, mock_ctm) | ||
self.assertIsNotNone(self.converter.cur_item) | ||
self.assertEqual(self.converter.cur_item.pageid, 1) | ||
|
||
def test_render_char(self): | ||
mock_matrix = (1, 2, 3, 4, 5, 6) | ||
mock_font = Mock() | ||
mock_font.to_unichr.return_value = "A" | ||
mock_font.char_width.return_value = 10 | ||
mock_font.char_disp.return_value = (0, 0) | ||
graphic_state = Mock() | ||
self.converter.cur_item = Mock() | ||
result = self.converter.render_char( | ||
mock_matrix, | ||
mock_font, | ||
fontsize=12, | ||
scaling=1.0, | ||
rise=0, | ||
cid=65, | ||
ncs=None, | ||
graphicstate=graphic_state, | ||
) | ||
self.assertEqual(result, 120.0) # Expected text width | ||
|
||
|
||
class TestTranslateConverter(unittest.TestCase): | ||
def setUp(self): | ||
self.rsrcmgr = PDFResourceManager() | ||
self.layout = {1: Mock()} | ||
self.translator_class = Mock() | ||
self.converter = TranslateConverter( | ||
self.rsrcmgr, | ||
layout=self.layout, | ||
lang_in="en", | ||
lang_out="zh", | ||
service="google", | ||
) | ||
|
||
def test_translator_initialization(self): | ||
self.assertIsNotNone(self.converter.translator) | ||
self.assertEqual(self.converter.translator.lang_in, "en") | ||
self.assertEqual(self.converter.translator.lang_out, "zh-CN") | ||
|
||
@patch("pdf2zh.converter.TranslateConverter.receive_layout") | ||
def test_receive_layout(self, mock_receive_layout): | ||
mock_page = LTPage(1, (0, 0, 100, 200)) | ||
mock_font = Mock() | ||
mock_font.fontname.return_value = "mock_font" | ||
mock_page.add( | ||
LTChar( | ||
matrix=(1, 2, 3, 4, 5, 6), | ||
font=mock_font, | ||
fontsize=12, | ||
scaling=1.0, | ||
rise=0, | ||
text="A", | ||
textwidth=10, | ||
textdisp=(1.0, 1.0), | ||
ncs=Mock(), | ||
graphicstate=Mock(), | ||
) | ||
) | ||
self.converter.receive_layout(mock_page) | ||
mock_receive_layout.assert_called_once_with(mock_page) | ||
|
||
@patch("concurrent.futures.ThreadPoolExecutor") | ||
@patch("pdf2zh.cache") | ||
def test_translation(self, mock_cache, mock_executor): | ||
mock_executor.return_value.__enter__.return_value.map.return_value = [ | ||
"你好", | ||
"{v1}", | ||
] | ||
mock_cache.deterministic_hash.return_value = "test_hash" | ||
mock_cache.load_paragraph.return_value = None | ||
mock_cache.write_paragraph.return_value = None | ||
|
||
sstk = ["Hello", "{v1}"] | ||
self.converter.thread = 2 | ||
results = [] | ||
with patch.object(self.converter, "translator") as mock_translator: | ||
mock_translator.translate.side_effect = lambda x: ( | ||
"你好" if x == "Hello" else x | ||
) | ||
for s in sstk: | ||
results.append(self.converter.translator.translate(s)) | ||
self.assertEqual(results, ["你好", "{v1}"]) | ||
|
||
def test_receive_layout_with_complex_formula(self): | ||
ltpage = LTPage(1, (0, 0, 500, 500)) | ||
ltchar = Mock() | ||
ltchar.fontname.return_value = "mock_font" | ||
ltline = LTLine(0.1, (0, 0), (10, 20)) | ||
ltpage.add(ltchar) | ||
ltpage.add(ltline) | ||
mock_layout = MagicMock() | ||
mock_layout.shape = (100, 100) | ||
mock_layout.__getitem__.return_value = -1 | ||
self.converter.layout = [None, mock_layout] | ||
self.converter.thread = 1 | ||
result = self.converter.receive_layout(ltpage) | ||
self.assertIsNotNone(result) | ||
|
||
def test_invalid_translation_service(self): | ||
with self.assertRaises(ValueError): | ||
TranslateConverter( | ||
self.rsrcmgr, | ||
layout=self.layout, | ||
lang_in="en", | ||
lang_out="zh", | ||
service="InvalidService", | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Oops, something went wrong.