Skip to content

Commit

Permalink
feat: bing
Browse files Browse the repository at this point in the history
  • Loading branch information
Byaidu committed Dec 6, 2024
1 parent 5612c41 commit 6519ce5
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 196 deletions.
21 changes: 5 additions & 16 deletions pdf2zh/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,23 +136,12 @@ def __init__(
self.noto = noto
self.translator: BaseTranslator = None
param = service.split(":", 1)
service_id = param[0]
service_name = param[0]
service_model = param[1] if len(param) > 1 else None
if service_id == "google":
self.translator = GoogleTranslator(service, lang_out, lang_in, None)
elif service_id == "deepl":
self.translator = DeepLTranslator(service, lang_out, lang_in, None)
elif service_id == "deeplx":
self.translator = DeepLXTranslator(service, lang_out, lang_in, None)
elif service_id == "ollama":
self.translator = OllamaTranslator(service, lang_out, lang_in, service_model)
elif service_id == "openai":
self.translator = OpenAITranslator(service, lang_out, lang_in, service_model)
elif service_id == "azure":
self.translator = AzureTranslator(service, lang_out, lang_in, None)
elif service_id == "tencent":
self.translator = TencentTranslator(service, lang_out, lang_in, None)
else:
for translator in [GoogleTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, OpenAITranslator, AzureTranslator, TencentTranslator]:
if service_name == translator.name:
self.translator = translator(service, lang_out, lang_in, service_model)
if not self.translator:
raise ValueError("Unsupported translation service")

def receive_layout(self, ltpage: LTPage):
Expand Down
221 changes: 63 additions & 158 deletions pdf2zh/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
from pathlib import Path
from pdf2zh import __version__
from pdf2zh.pdf2zh import extract_text
from pdf2zh.translator import (
BaseTranslator,
GoogleTranslator,
DeepLTranslator,
DeepLXTranslator,
OllamaTranslator,
OpenAITranslator,
AzureTranslator,
TencentTranslator,
)

import gradio as gr
import numpy as np
Expand All @@ -13,106 +23,14 @@

# Map service names to pdf2zh service options
# five value, padding with None
service_map = {
"Google": (None, None, None),
"DeepL": ("DEEPL_SERVER_URL", "DEEPL_AUTH_KEY", None),
"DeepLX": ("DEEPLX_SERVER_URL", "DEEPLX_AUTH_KEY", None),
"Ollama": ("OLLAMA_HOST", None, None),
"OpenAI": ("OPENAI_BASE_URL", None, "OPENAI_API_KEY"),
"Azure": ("AZURE_APIKEY", "AZURE_ENDPOINT", "AZURE_REGION"),
"Tencent": ("TENCENT_SECRET_KEY", "TENCENT_SECRET_ID", None),
}
service_config = {
"Google": {
"apikey_content": {"visible": False},
"apikey2_visibility": {"visible": False},
"model_visibility": {"visible": False},
"apikey3_visibility": {"visible": False},
},
"DeepL": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"apikey2_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[1]),
"label": s[1],
},
"model_visibility": {"visible": False},
"apikey3_visibility": {"visible": False},
},
"DeepLX": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"apikey2_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[1]),
"label": s[1],
},
"model_visibility": {"visible": False},
"apikey3_visibility": {"visible": False},
},
"Ollama": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"apikey2_visibility": {"visible": False},
"model_visibility": lambda s: {"visible": True, "value": s[1]},
"apikey3_visibility": {"visible": False},
},
"OpenAI": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[2]),
"label": s[2],
},
"apikey2_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"model_visibility": {"visible": True, "value": "gpt-4o"},
"apikey3_visibility": {"visible": False},
},
"Azure": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"apikey2_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[1]),
"label": s[1],
},
"model_visibility": {"visible": False},
"apikey3_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[2]),
"label": s[2],
},
},
"Tencent": {
"apikey_content": lambda s: {
"visible": True,
"value": os.environ.get(s[0]),
"label": s[0],
},
"apikey2_visibility": lambda s: {
"visible": True,
"value": os.environ.get(s[1]),
"label": s[1],
},
"model_visibility": {"visible": False},
"apikey3_visibility": {"visible": False},
},
service_map: dict[str, BaseTranslator] = {
"Google": GoogleTranslator,
"DeepL": DeepLTranslator,
"DeepLX": DeepLXTranslator,
"Ollama": OllamaTranslator,
"OpenAI": OpenAITranslator,
"Azure": AzureTranslator,
"Tencent": TencentTranslator,
}
lang_map = {
"Chinese": "zh",
Expand All @@ -135,7 +53,7 @@
if os.environ.get("PDF2ZH_DEMO"):
flag_demo = True
service_map = {
"Google": ("google", None, None),
"Google": GoogleTranslator,
}
page_map = {
"First": [0],
Expand All @@ -147,14 +65,10 @@

def verify_recaptcha(response):
recaptcha_url = "https://www.google.com/recaptcha/api/siteverify"

print("reCAPTCHA", server_key, response)

data = {"secret": server_key, "response": response}
result = requests.post(recaptcha_url, data=data).json()

print("reCAPTCHA", result.get("success"))

return result.get("success")


Expand All @@ -167,18 +81,8 @@ def pdf_preview(file):


def upload_file(file, service, progress=gr.Progress()):
"""Handle file upload, validation, and initial preview."""
if not file or not os.path.exists(file):
return None, None

try:
# Convert first page for preview
preview_image = pdf_preview(file)

return file, preview_image
except Exception as e:
print(f"Error converting PDF: {e}")
return None, None
preview_image = pdf_preview(file)
return file, preview_image


def download_with_limit(url, save_path, size_limit):
Expand All @@ -187,10 +91,10 @@ def download_with_limit(url, save_path, size_limit):
with requests.get(url, stream=True, timeout=10) as response:
response.raise_for_status()
content = response.headers.get("Content-Disposition")
try:
try: # filename from header
_, params = cgi.parse_header(content)
filename = params["filename"]
except Exception:
except Exception: # filename from url
filename = os.path.basename(url)
with open(save_path / filename, "wb") as file:
for chunk in response.iter_content(chunk_size=chunk_size):
Expand Down Expand Up @@ -508,45 +412,46 @@ def env_var_checker(env_var_name: str) -> str:
return details_wrapper(envs_status)

def on_select_service(service, evt: gr.EventData):
if service in service_config:
config = service_config[service]
apikey_content = gr.update(
**(
config["apikey_content"](service_map[service])
if callable(config["apikey_content"])
else config["apikey_content"]
)
)
apikey2_visibility = gr.update(
**(
config["apikey2_visibility"](service_map[service])
if callable(config["apikey2_visibility"])
else config["apikey2_visibility"]
)
)
model_visibility = gr.update(
**(
config["model_visibility"](service_map[service])
if callable(config["model_visibility"])
else config["model_visibility"]
)
)
apikey3_visibility = gr.update(
**(
config["apikey3_visibility"](service_map[service])
if callable(config["apikey3_visibility"])
else config["apikey3_visibility"]
)
)
else:
raise gr.Error("Strange Service")
return (
env_var_checker(service_map[service]),
model_visibility,
apikey_content,
apikey2_visibility,
apikey3_visibility,
)
# if service in service_config:
# config = service_config[service]
# apikey_content = gr.update(
# **(
# config["apikey_content"](service_map[service])
# if callable(config["apikey_content"])
# else config["apikey_content"]
# )
# )
# apikey2_visibility = gr.update(
# **(
# config["apikey2_visibility"](service_map[service])
# if callable(config["apikey2_visibility"])
# else config["apikey2_visibility"]
# )
# )
# model_visibility = gr.update(
# **(
# config["model_visibility"](service_map[service])
# if callable(config["model_visibility"])
# else config["model_visibility"]
# )
# )
# apikey3_visibility = gr.update(
# **(
# config["apikey3_visibility"](service_map[service])
# if callable(config["apikey3_visibility"])
# else config["apikey3_visibility"]
# )
# )
# else:
# raise gr.Error("Strange Service")
# return (
# env_var_checker(service_map[service]),
# model_visibility,
# apikey_content,
# apikey2_visibility,
# apikey3_visibility,
# )
pass

def on_select_filetype(file_type):
return (
Expand Down
Loading

0 comments on commit 6519ce5

Please sign in to comment.