diff --git a/src/REMSGUtil.py b/src/REMSGUtil.py index e63900b..3706a81 100644 --- a/src/REMSGUtil.py +++ b/src/REMSGUtil.py @@ -234,19 +234,25 @@ def importCSV(msgObj: MSG, filename: str, version: int = None, langCount: int = return msg -def exportTXT(msg: MSG, filename: str, lang: int, encode="utf-8"): +def exportTXT(msg: MSG, filename: str, lang: int, encode=None): """write txt file from MSG object with specified language""" - with io.open(filename, "w", encoding=encode) as txtf: + with io.open(filename, "w", encoding=encode if encode is not None else 'utf-8') as txtf: txtf.writelines([''+entry.langs[lang].replace('\r\n','')+'\n' for entry in msg.entrys]) -def importTXT(msgObj: MSG, filename: str, lang: int) -> MSG: +def importTXT(msgObj: MSG, filename: str, lang: int, encode=None) -> MSG: """read txt file, modify the provided msg object, and return the new MSG object""" + if encode is None: + encode = getEncoding(filename) + elif 'utf' in encode and 'sig' not in encode: + testEncode = getEncoding(filename) + if testEncode.endswith('sig'): + encode = testEncode msg = copy.deepcopy(msgObj) lines = None - with io.open(filename, mode="r", encoding=getEncoding(filename)) as txtf: + with io.open(filename, mode="r", encoding=encode) as txtf: lines = list([s.rstrip('\n').rstrip('\r').removeprefix("").replace('','\r\n') for s in txtf.readlines() if s.startswith("")]) assert len(lines) == len(msg.entrys), "Invalid number of entry" diff --git a/src/main.py b/src/main.py index fbe7262..325c4ec 100644 --- a/src/main.py +++ b/src/main.py @@ -92,7 +92,7 @@ def fillList(path: str, filetype = 'msg'): else: return [] -def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT_LANG_LU["ja"], attrSum=""): +def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT_LANG_LU["ja"], **kwargs): try: filenameFull = os.path.abspath(item) print("processing:"+filenameFull) @@ -108,9 +108,9 @@ def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT elif mode == "txt": if modFile is None: - REMSGUtil.exportTXT(msg, filenameFull+'.'+mode, lang) + REMSGUtil.exportTXT(msg, filenameFull+'.'+mode, lang, encode=kwargs["txtformat"]) else: - REMSGUtil.exportMSG(msg=REMSGUtil.importTXT(msg, modFile, lang), filename=filenameFull+'.new') + REMSGUtil.exportMSG(msg=REMSGUtil.importTXT(msg, modFile, lang, encode=kwargs["txtformat"]), filename=filenameFull+'.new') elif mode == "json": if modFile is None: @@ -137,6 +137,7 @@ def main(): parser.add_argument('-m', '--mode', type=str, choices=['csv','txt','json'], default='csv', help='choose output file format.\n txt = msg tool style txt.\n csv = all lang in one csv with rich info.\n json = all lang in one json with rich info in mhrice format') parser.add_argument('-e', '--edit', type=str, help='input (csv/txt/json) file to edit the content.\n if input as folder, the filename and number of files\n should be same as original .msg file\n (with corresponding (.txt/.csv/.json) extension)') parser.add_argument('-l', '--lang', type=str, default='ja', choices=REMSGUtil.SHORT_LANG_LU.keys(), help='input the lang you want to export for txt mode (default ja)\n') + parser.add_argument('-f', '--txtformat', type=str, default=None, choices=['utf-8', 'utf-8-sig'], help="force txt read/write format to be 'utf-8' or 'utf-8-sig'(BOM).\n") parser.add_argument('args', nargs=argparse.REMAINDER) args = parser.parse_args() @@ -218,7 +219,7 @@ def main(): sys.exit(1) executor = concurrent.futures.ProcessPoolExecutor(args.multiprocess) - futures = [executor.submit(worker, file, mode = args.mode, modFile = edit, lang = REMSGUtil.SHORT_LANG_LU[args.lang]) for file, edit in zip(filenameList, editList)] + futures = [executor.submit(worker, file, mode = args.mode, modFile = edit, lang = REMSGUtil.SHORT_LANG_LU[args.lang], txtformat=args.txtformat) for file, edit in zip(filenameList, editList)] concurrent.futures.wait(futures) if __name__ == "__main__":