-
Notifications
You must be signed in to change notification settings - Fork 0
/
core.py
129 lines (105 loc) · 3.49 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import json
import os
import threading
from typing import Optional
from PIL import ImageQt
import pytesseract
import shutil
import webbrowser
import urllib.parse
import time
from PySide6.QtGui import QClipboard
from PySide6.QtWidgets import QApplication
from pyclip import copy as copy_to_clipboard
CLIPBOARD: Optional[QClipboard] = None
def get_clipboardimg():
global CLIPBOARD
if CLIPBOARD is None:
CLIPBOARD = QApplication.clipboard()
image = CLIPBOARD.image()
if image:
return ImageQt.fromqimage(image)
else:
return None
def find_tesseract():
if os.path.exists("tesseract"):
if os.path.isdir("tesseract"):
for file in os.listdir("tesseract"):
if file.lower().startswith("tesseract"):
filepath = os.path.join("tesseract", file)
if os.access(filepath, os.X_OK):
print("Found local binary of tesseract library")
return filepath
return shutil.which("tesseract")
class JSONSettings(dict):
def __init__(self, filename):
self._filename = filename
if os.path.exists(self._filename):
self.load()
else:
super().__init__({
"langs": [],
"engine": 0,
"copy": True,
"webSearch": False
})
self.save()
def save(self):
with open(self._filename, "w", encoding="utf-8") as fobj:
json.dump(self, fobj, ensure_ascii=False)
def load(self):
with open(self._filename, "r", encoding="utf-8") as fobj:
super().__init__(json.load(fobj))
class ClipboardImageListener(threading.Thread):
def __init__(self, handler):
super().__init__(target=self.loop)
self._handler = handler
self.__alive = True
self._tmpimg = get_clipboardimg()
def destroy(self):
self.__alive = False
def loop(self) -> None:
while self.__alive:
try:
time.sleep(1)
img = get_clipboardimg()
except OSError:
time.sleep(0.5)
continue
if img != self._tmpimg and img is not None:
self._tmpimg = img
self._handler(img)
@property
def handler(self):
return self._handler
@handler.setter
def handler(self, value):
if callable(value):
self._handler = value
class TesseractEngine:
WEBENGINE_TEMPLATES = [
"https://yandex.ru/search/?lr=38&text={}",
"https://www.google.com/search?q={}"
]
def __init__(self, path: str):
self._tesseract_path = path
self._langs = list(sorted(pytesseract.get_languages()))
self.selected_langs = []
self.records = []
pytesseract.pytesseract.tesseract_cmd = path
@property
def langs(self):
return tuple(self._langs)
def img2tesseract(self, img,
web_search=False, clipboard_copy=True, store=True, webengine_index=0):
text = pytesseract.image_to_string(img, lang="+".join(self.selected_langs)).strip()
if web_search:
safe_string = urllib.parse.quote_plus(text)
url = self.WEBENGINE_TEMPLATES[webengine_index].format(safe_string)
webbrowser.open(url)
if clipboard_copy:
copy_to_clipboard(text)
if store:
self.records.append(text)
print(f"Copied text: {text}")
return text