diff --git a/allow_setting.json b/allow_setting.json deleted file mode 100644 index aa0abcf..0000000 --- a/allow_setting.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "engine": [ - "opencc", - "zhconvert" - ], - "opencc": { - "converter": [ - "s2t", - "t2s", - "s2tw", - "tw2s" - ] - }, - "zhconvert": { - "converter": [ - "Simplified", - "Traditional", - "China", - "Taiwan", - "WikiSimplified", - "WikiTraditional" - ] - }, - "format": [ - "Straight", - "Horizontal" - ], - "content_opt": { - "zh-TW": [ - "s2t", - "s2tw", - "Traditional", - "Taiwan", - "WikiTraditional" - ], - "zh-CN": [ - "t2s", - "tw2s", - "Simplified", - "China", - "WikiSimplified" - ] - } -} \ No newline at end of file diff --git a/app2.py b/app2.py index 8aa8c6f..f5e0a34 100644 --- a/app2.py +++ b/app2.py @@ -3,16 +3,18 @@ import mimetypes import os import re +import shutil import sys +import time import zipfile -import chardet -import logging +from configparser import ConfigParser from modules.console import Console -from modules.utils.error import FileTypeError, FileUnzipError, ConfigError from modules.logger import Logger from modules.opencc import OpenCC -from modules.utils.tools import get_key, resource_path +from modules.utils.error import (ConfigError, FileTypeError, FileUnzipError, + ZhConvertError) +from modules.utils.tools import encoding, get_key, resource_path from modules.zhconvert import ZhConvert @@ -26,14 +28,15 @@ def __init__(self): Objects: logger -- log記錄檔物件 workpath -- 本程式所在的絕對路徑 - config -- 讀取本程式路徑底下的 config.json 設定檔內容 + cfg -- 讀取本程式路徑底下的 config.ini 設定檔內容 convert_file_list -- 執行 unzip 方法後取得 EPub 中需要轉換的檔案之絕對路徑清單(list) new_filename -- 轉換後的 EPub 檔案的檔案名稱 """ - self.logger = Logger(name='EPUB') self.workpath = os.path.abspath( os.path.join(sys.argv[0], os.path.pardir)) - self.config = self._read_config(f'{self.workpath}/config.json') + self.logger = Logger( + name='EPUB', workpath=self.workpath) + self.cfg = self._read_config(f'{self.workpath}/config.ini') self.convert_file_list = None self.file_path = None @@ -45,21 +48,23 @@ def _read_config(self, config): """ if os.path.exists(config): self.logger.info('_read_config', 'read config') - with open(config, 'r', encoding='utf-8') as r_c: - config = json.loads(r_c.read()) + cfg = ConfigParser() + cfg_encoding = encoding(config)['encoding'] + self.logger.info('_read_config encoding',encoding(config)['encoding']) + cfg.read(config, encoding=cfg_encoding) self.logger.info( - '_read_config', f"Aleady read config\nengine: {config['engine']}\nconverter: {config['converter']}\nformat: {config['format']}") - return config + '_read_config', f"already read config\nengine: {cfg['setting']['engine']}\nconverter: {cfg['setting']['converter']}\nformat: {cfg['setting']['format']}") + return cfg else: - print('error') + self.logger.info(f'_read_config', f'can\'t find "config.ini", please check config file.') + + """ def _read_allow_setting(self, config): + '''讀取允許設定 - def _read_allow_setting(self, config): - """讀取允許設定 - Arguments: config {str} -- allow_setting.json path - """ - print(resource_path('allow_setting.json')) + ''' + print(resource_path('allow_setting.json')) """ @property def _zip(self): @@ -103,6 +108,7 @@ def convert(self, epub_file_path): """ try: self.file_path = epub_file_path + self.logger.info('convert', f'file path: {self.file_path}') self._check(epub_file_path) self._unzip(epub_file_path) if self.convert_file_list: @@ -111,10 +117,10 @@ def convert(self, epub_file_path): self._convert_content(self.convert_file_list) self._rename(self.convert_file_list) self._zip - # self._clean + self._clean + self.logger.info('convert', f'success convert {os.path.basename(epub_file_path)}') except Exception as e: self.logger.error('convert', f'{str(e)}') - os.system('pause') def _rename(self, convert_file_list): """重新命名已轉換的檔案 @@ -132,7 +138,7 @@ def _filename(self): "s2t": ["s2t", "s2tw", "Traditional", "Taiwan", "WikiTraditional"], "t2s": ["t2s", "tw2s", "Simplified", "China", "WikiSimplified"] } - converter = get_key(converter_dict, self.config['converter']) + converter = get_key(converter_dict, self.cfg['setting']['converter']) openCC = OpenCC(converter) new_filename = openCC.convert(os.path.basename(self.file_path)) return os.path.join(os.path.dirname(self.file_path), new_filename) @@ -159,42 +165,65 @@ def _convert_content(self, convert_file_list): "format": ["Straight", "Horizontal"] } # 檢查設定檔是否有無錯誤 - if self.config['engine'] not in setting['engine']: - raise ConfigError('Engine is not a right engine in "config.json"') - if self.config['converter'] not in setting['converter'][self.config['engine']]: + if self.cfg['setting']['engine'] not in setting['engine']: + raise ConfigError('Engine is not a right engine in "config.ini"') + if self.cfg['setting']['converter'] not in setting['converter'][self.cfg['setting']['engine']]: raise ConfigError( - 'Converter is not a right converter in "config.json"') - if self.config['format'] not in setting['format']: - raise ConfigError('Format is not a right format in "config.json"') + 'Converter is not a right converter in "config.ini"') + if self.cfg['setting']['format'] not in setting['format']: + raise ConfigError('Format is not a right format in "config.ini"') # 判斷轉換引擎並轉換 - if self.config['engine'].lower() == 'opencc': + if self.cfg['setting']['engine'].lower() == 'opencc': self.logger.debug('convert_text', 'engine: opencc') for f in convert_file_list: self.logger.debug( 'convert_text', f'now convert "{os.path.basename(f)}"') self._content_opt_lang(f) - self._opencc(self.config['converter'], f) - if self.config['engine'].lower() == 'zhconvert': + self._opencc(self.cfg['setting']['converter'], f) + if self.cfg['setting']['engine'].lower() == 'zhconvert': self.logger.debug('convert_text', 'engine: zhconvert 繁化姬') for f in convert_file_list: + self.logger.debug( + 'convert_text', f'now convert "{os.path.basename(f)}"') self._content_opt_lang(f) + self._zhconvert(self.cfg['setting']['converter'], f) def _opencc(self, converter, file): - """opencc 轉換作業 + """opencc Arguments: - converter {str} -- config.json 中 converter 設定,轉換模式 + converter {str} -- config.ini 中 converter 設定,轉換模式 file {str} -- 欲進行文字轉換的內文文檔的絕對路徑 """ openCC = OpenCC(converter) - f_r = open(file, 'r', encoding='utf-8').readlines() + f_encoding = encoding(file)['encoding'] + start_time = time.time() + f_r = open(file, 'r', encoding=f_encoding).readlines() with open(file + '.new', 'w', encoding='utf-8') as f_w: for line in f_r: converted = openCC.convert(line) f_w.write(converted) + end_time = time.time() + self.logger.info('_opencc', f'convert file: {os.path.basename(file)} cost {"{:.2f}".format(end_time-start_time)}s') - def _zhconvert(self, converter): - """ """ + def _zhconvert(self, converter, file): + """zhconvert 繁化姬 + + Arguments: + converter {str} -- config.ini 中 converter 設定,轉換模式 + file {str} -- 欲進行文字轉換的內文文檔的絕對路徑 + """ + zhconvert = ZhConvert() + f_encoding = encoding(file)['encoding'] + start_time = time.time() + with open(file, 'r', encoding=f_encoding) as f_r: + zhconvert.convert(text=f_r.read(), converter=converter) + with open(file + '.new', 'w', encoding='utf-8') as f_w: + if zhconvert.text is None: + raise ZhConvertError() + f_w.write(zhconvert.text) + end_time = time.time() + self.logger.info('_zhconvert', f'convert file: {os.path.basename(file)} cost {"{:.2f}".format(end_time-start_time)}s') def _content_opt_lang(self, content_file_path): """修改 content.opf 中語言標籤的值 @@ -210,21 +239,51 @@ def _content_opt_lang(self, content_file_path): regex = re.compile( r"[\S]*", re.IGNORECASE) fileline = open(content_file_path, encoding='utf-8').read() - if self.config['converter'] in converter["zh-TW"]: + if self.cfg['setting']['converter'] in converter["zh-TW"]: self.logger.info('_content_lang', 'convert language to zh-TW') modify = re.sub( regex, f'zh-TW', fileline) - if self.config['converter'] in converter["zh-CN"]: + if self.cfg['setting']['converter'] in converter["zh-CN"]: self.logger.info('_content_lang', 'convert language to zh-CN') modify = re.sub( regex, f'zh-CN', fileline) open(content_file_path, 'w', encoding='utf-8').write(modify) - def _format(self): - """ """ + # def _format(self, file_path): + # """ """ + # modify_files = {} + # opf_tmp = [] + # css_tmp = [] + # content_tmp = [] + # for root, _dirs, files in os.walk(f'{file_path}_files/'): + # for filename in files: + # if filename.endswith('opf'): + # opf_tmp.append(filename) + # if filename.endswith('css'): + # css_tmp.append(filename) + # if filename.endswith(('xhtml', 'html', 'htm')): + # content_tmp.append(filename) + # modify_files['opf'] = opf_tmp + # modify_files['css'] = css_tmp + # modify_files['content'] = content_tmp + # #橫式 + # if self.cfg['setting']['format'].lower() == 'horizontal': + # self.logger.info('_format', 'set content to horizontal') + # if not any(modify_files['css']): + # print('css file not found') + # #直式 + # if self.cfg['setting']['format'].lower() == 'straight': + # self.logger.info('_format', 'set content to straight') + # print('直式') + @property def _clean(self): - """ """ + """ 清除解壓縮後的檔案 """ + if os.path.isdir( f'{self.file_path}_files'): + self.logger.info('_clean', f'delete tmp files: {self.file_path}_files') + shutil.rmtree(f'{self.file_path}_files') + else: + self.logger.error('_clean', f'path: {self.file_path}_files not found.') def _check(self, file_path): """檢查檔案 MIME 格式 @@ -242,9 +301,8 @@ def _check(self, file_path): if __name__ == "__main__": - #epub = EPubConv() - # epub.convert('H:/VSCode/Python/epubconv/1.epub') - """ zh = ZhConvert() - zh.convert() """ - #epub._read_allow_setting('allow_setting.json') + EPubConvert = EPubConv() + for epub in sys.argv[1:]: + EPubConvert.convert(epub) + os.system("pause") pass diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..340b846 --- /dev/null +++ b/config.ini @@ -0,0 +1,4 @@ +[setting] +engine=zhconvert +converter=Traditional +format=Horizontal \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index 8fc91ad..0000000 --- a/config.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "//_engine_comment": { - "engine": { - "//": "轉換時所使用的方法,分為 OpenCC(開放中文轉換) 與 zhconvert(繁化姬)", - "opencc": "使用OpenCC進行轉換,不需要網路", - "zhconvert": "使用繁化姬API進行轉換,網路為必須" - } - }, - "//_converter_comment": { - "opencc": { - "//": "OpenCC 轉換時使用的模式 ※有區分大小寫", - "s2t": "簡轉繁", - "t2s": "繁轉簡", - "s2tw": "簡體字轉台灣用字", - "tw2s": "台灣用字轉簡體字" - }, - "zhconvert": { - "//": "繁化姬轉換時使用的模式 ※有區分大小寫", - "Simplified": "簡體化", - "Traditional": "繁體化", - "China": "中國化", - "Taiwan": "台灣化", - "WikiSimplified": "維基簡體化", - "WikiTraditional": "維基繁體化" - } - }, - "//_format_comment": { - "//": "內文格式轉換", - "Straight": "文字格式轉為直式", - "Horizontal": "文字格式轉為橫式" - }, - "engine": "opencc", - "converter": "s2t", - "format": "Horizontal" -} \ No newline at end of file diff --git a/file_version_info.txt b/file_version_info.txt new file mode 100644 index 0000000..746f4a0 --- /dev/null +++ b/file_version_info.txt @@ -0,0 +1,43 @@ +# UTF-8 +# +# For more details about fixed file info 'ffi' see: +# http://msdn.microsoft.com/en-us/library/ms646997.aspx +VSVersionInfo( + ffi=FixedFileInfo( + # filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4) + # Set not needed items to zero 0. + filevers=(2, 0, 0, 0), + prodvers=(2, 0, 0, 0), + # Contains a bitmask that specifies the valid bits 'flags'r + mask=0x3f, + # Contains a bitmask that specifies the Boolean attributes of the file. + flags=0x0, + # The operating system for which this file was designed. + # 0x4 - NT and there is no need to change it. + OS=0x40004, + # The general type of file. + # 0x1 - the file is an application. + fileType=0x1, + # The function of the file. + # 0x0 - the function is not defined for this fileType + subtype=0x0, + # Creation date and time stamp. + date=(0, 0) + ), + kids=[ + StringFileInfo( + [ + StringTable( + u'040904B0', + [StringStruct(u'CompanyName', u''), + StringStruct(u'FileDescription', u'EpubConv'), + StringStruct(u'FileVersion', u'2.0.0'), + StringStruct(u'InternalName', u'Epubconv'), + StringStruct(u'LegalCopyright', u''), + StringStruct(u'OriginalFilename', u'Epubconv.Exe'), + StringStruct(u'ProductName', u'Epubconv'), + StringStruct(u'ProductVersion', u'2.0.0')]) + ]), + VarFileInfo([VarStruct(u'Translation', [1033, 1200])]) + ] +) diff --git a/modules/logger.py b/modules/logger.py index 999804d..b91c3a4 100644 --- a/modules/logger.py +++ b/modules/logger.py @@ -1,7 +1,9 @@ # logger 記錄檔模組 import logging -import time import logging.config +import sys +import time +import os class Logger(object): @@ -20,15 +22,25 @@ class Logger(object): # logging.basicConfig(format='%(asctime)s %(levelname)s :\n%(message)s', # level=level, datefmt='%Y-%m-%d %H:%M:%S', filename='DBAPI.log', filemode='w') - def __init__(self, name='logger'): + def __init__(self, name='logger', config=None, workpath=None): """Logger Keyword Arguments: name {str} -- [name of logging] (default: {'logger'}) """ - logging.config.fileConfig( - fname='logging.conf', disable_existing_loggers=False) + logging.basicConfig(level=logging.INFO) + #logging.config.fileConfig(fname=config, disable_existing_loggers=False) + formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s : %(message)s') + if not workpath: + workpath = os.path.abspath(os.path.join(sys.argv[0], os.path.pardir)) + + log = logging.FileHandler( + f'{workpath}/epubconv.log', 'w', encoding='utf-8') + log.setFormatter(formatter) + log.setLevel(logging.DEBUG) + self.logger = logging.getLogger(name) + self.logger.addHandler(log) def debug(self, function, msg): """ logging debug level diff --git a/modules/utils/error.py b/modules/utils/error.py index b266654..1027f05 100644 --- a/modules/utils/error.py +++ b/modules/utils/error.py @@ -9,5 +9,23 @@ class FileUnzipError(Exception): class ConfigError(Exception): pass + class RequestError(Exception): - pass \ No newline at end of file + pass + + +class ZhconvertKeyNotFound(Exception): + def __init__(self, error_keys): + self.error_keys = error_keys + + def __str__(self): + return f'Parameter key not found. {self.error_keys} not allow key.' + + +class ZhConvertMissNecessarykey(Exception): + def __str__(self): + return f'Miss necessary key "text" or "converter".' + +class ZhConvertError(Exception): + def __str__(self): + return f'ZhConvert response is None, please check text or converter.' diff --git a/modules/utils/tools.py b/modules/utils/tools.py index fb1ce85..4ec1a7e 100644 --- a/modules/utils/tools.py +++ b/modules/utils/tools.py @@ -1,6 +1,8 @@ import os import sys +import cchardet as chardet + def get_key(d: dict, value: str): """使用 value 搜尋 dict key @@ -20,3 +22,11 @@ def resource_path(relative_path): base_path = getattr(sys, '_MEIPASS', os.path.dirname( os.path.abspath(__file__))) return os.path.join(base_path, relative_path) + + +def encoding(file_path): + """ Return file encoding """ + with open(file_path, "rb") as f: + msg = f.read() + result = chardet.detect(msg) + return result \ No newline at end of file diff --git a/modules/zhconvert.py b/modules/zhconvert.py index 29b66e0..983497f 100644 --- a/modules/zhconvert.py +++ b/modules/zhconvert.py @@ -1,22 +1,25 @@ # 繁化姬模組 +import json + import requests -from modules.utils.error import RequestError + +from modules.utils.error import (RequestError, ZhconvertKeyNotFound, + ZhConvertMissNecessarykey) class ZhConvert: """ 繁化姬模組 """ - def __init__(self, converter): + def __init__(self): """ """ - self.API = 'https://api.zhconvert.org' - self.converter = converter + self.api = f'https://api.zhconvert.org' def __request(self, endpoint: str, playload): - with requests.get(f'{self.API}{endpoint}', data=playload) as req: + with requests.get(f'{self.api}{endpoint}', data=playload) as req: if req.status_code != 200: raise RequestError('Request error.') req.encoding = 'utf-8' - return req.text + return json.loads(req.text) def __check(self): allow_converter = ["Simplified", @@ -26,10 +29,69 @@ def __check(self): "WikiSimplified", "WikiTraditional"] - def convert(self, text): - playload = { - 'text': text, - 'converter': self.converter - } - test = self.__request('/convert', playload) - print(test) + def convert(self, **args): + """繁化姬轉換 + + API doc : https://docs.zhconvert.org/api/convert/ + + Arguments: + text : 欲轉換的文字 + converter : 所要使用的轉換器。有 Simplified (簡體化)、 Traditional (繁體化)、 + China (中國化)、 Taiwan (台灣化)、WikiSimplified (維基簡體化)、 + WikiTraditional (維基繁體化)。 + ignoreTextStyles : 由那些不希望被繁化姬處理的 "樣式" 以逗號分隔所組成的字串。 + 通常用於保護特效字幕不被轉換, + 例如字幕組的特效字幕若是以 OPJP 與 OPCN 作為樣式名。 + 可以設定 "OPJP,OPCN" 來作保護。 + jpTextStyles : 告訴繁化姬哪些樣式要當作日文處理(預設為伺服器端自動猜測)。 + 若要自行設定,則必須另外再加入 *noAutoJpTextStyles 這個樣式。 + 所有樣式以逗號分隔組成字串, + 例如: "OPJP,EDJP,*noAutoJpTextStyles" 表示不讓伺服器自動猜測, + 並指定 OPJP 與 EDJP 為日文樣式。 + jpStyleConversionStrategy : 對於日文樣式該如何處理。 + "none" 表示 無(當成中文處理) 、 "protect" 表示 保護 、 + "protectOnlySameOrigin" 表示 僅保護原文與日文相同的字 、 + "fix" 表示 修正 。 + jpTextConversionStrategy : 對於繁化姬自己發現的日文區域該如何處理。 + "none" 表示 無(當成中文處理) 、 "protect" 表示 保護 、 + "protectOnlySameOrigin" 表示 僅保護原文與日文相同的字 、 + "fix" 表示 修正 。 + modules : 強制設定模組啟用/停用 。 -1 / 0 / 1 分別表示 自動 / 停用 / 啟用 。 + 字串使用 JSON 格式編碼。使用 * 可以先設定所有模組的狀態。 + 例如:{"*":0,"Naruto":1,"Typo":1} 表示停用所有模組, + 但啟用 火影忍者 與 錯別字修正 模組。 + userPostReplace : 轉換後再進行的額外取代。 + 格式為 "搜尋1=取代1\\n搜尋2=取代2\\n..." 。 + 搜尋1 會在轉換後再被取代為 取代1 。 + userPreReplace : 轉換前先進行的額外取代。 + 格式為 "搜尋1=取代1\\n搜尋2=取代2\\n..." 。 + 搜尋1 會在轉換前先被取代為 取代1 。 + userProtectReplace : 保護字詞不被繁化姬修改。 + 格式為 "保護1\\n保護2\\n..." 。 + 保護1 、 保護2 等字詞將不會被繁化姬修改。 + """ + allow_keys = [ + 'text', + 'converter', + 'ignoreTextStyles', + 'jpTextStyles', + 'jpStyleConversionStrategy', + 'jpTextConversionStrategy', + 'modules', + 'userPostReplace', + 'userPreReplace', + 'userProtectReplace', + ] + error_key = [key for key in args.keys() if key not in allow_keys] + if error_key: + raise ZhconvertKeyNotFound(', '.join(error_key)) + if args.get('text', None) is None or args.get('converter', None) is None: + raise ZhConvertMissNecessarykey() + self.convert_obj = self.__request('/convert', args) + return self.convert_obj + + @property + def text(self): + if self.convert_obj['code'] != 0: + return None + return self.convert_obj['data']['text'] diff --git a/requirements.txt b/requirements.txt index 2f1dce4..f485b03 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/version.txt b/version.txt deleted file mode 100644 index b605a8a..0000000 --- a/version.txt +++ /dev/null @@ -1,40 +0,0 @@ -VSVersionInfo( -ffi=FixedFileInfo( -# filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4) -# Set not needed items to zero 0. -filevers=(1,0,5,2), -prodvers=(1,0,5,2), -# Contains a bitmask that specifies the valid bits 'flags' -mask=0x3f, -# Contains a bitmask that specifies the Boolean attributes of the file. -flags=0x0, -# The operating system for which this file was designed. -# 0x4 - NT and there is no need to change it. -OS=0x4, -# The general type of file. -# 0x1 - the file is an application. -fileType=0x1, -# The function of the file. -# 0x0 - the function is not defined for this fileType -subtype=0x0, -# Creation date and time stamp. -date=(0, 0) -), -kids=[ -StringFileInfo( -[ -StringTable( -u'040904b0', -[StringStruct(u'CompanyName', u''), -StringStruct(u'ProductName', u'Epubconv'), -StringStruct(u'ProductVersion', u'1.0.5.2'), -StringStruct(u'InternalName', u'Epubconv'), -StringStruct(u'OriginalFilename', u'epubconv.exe'), -StringStruct(u'FileVersion', u'1.0.5.2'), -StringStruct(u'FileDescription', u'Epubconv'), -StringStruct(u'LegalCopyright', u'ThanatosDi'), -StringStruct(u'LegalTrademarks', u''),]) -]), -VarFileInfo([VarStruct(u'Translation', [1033, 1200])]) -] -) \ No newline at end of file