From ebfe88433fcfe93193d40478ee4f6e5ae24d93e0 Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Sun, 12 Jan 2025 23:18:00 +0100 Subject: [PATCH] fix(ofx): Use compliant `UNICODE` name for UTF-8 encoding --- src/ofxstatement/ofx.py | 22 +++++++++++++++++++--- src/ofxstatement/tests/test_ofx.py | 4 ++-- src/ofxstatement/tests/test_ofx_invest.py | 2 +- src/ofxstatement/tool.py | 5 +++-- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/ofxstatement/ofx.py b/src/ofxstatement/ofx.py index 53359ab..1448d19 100644 --- a/src/ofxstatement/ofx.py +++ b/src/ofxstatement/ofx.py @@ -1,3 +1,4 @@ +import codecs from typing import Optional, Union from datetime import datetime, date from decimal import Decimal @@ -22,20 +23,35 @@ def __init__(self, statement: Statement) -> None: self.default_float_precision = 2 self.invest_transactions_float_precision = 5 - def toxml(self, pretty: bool = False) -> str: + def toxml(self, pretty: bool = False, encoding: str = "utf-8") -> str: et = self.buildDocument() xmlstring = etree.tostring(et.getroot(), "unicode") if pretty: dom = minidom.parseString(xmlstring) xmlstring = dom.toprettyxml(indent=" ", newl="\r\n") xmlstring = xmlstring.replace('', "").lstrip() + + codec = codecs.lookup(encoding) + if codec.name == "utf-8": + encoding_name = "UNICODE" + charset_name = "UTF-8" + elif codec.name.startswith("cp"): + encoding_name = "USASCII" + charset_name = codec.name[2:] + else: + # This is non-standard, because according to the OFX spec the + # CHARSET should be the codepage number. We handle this gracefully, + # since the only alternative is throwing an error here. + encoding_name = "USASCII" + charset_name = codec.name.upper() + header = ( "OFXHEADER:100\r\n" "DATA:OFXSGML\r\n" "VERSION:102\r\n" "SECURITY:NONE\r\n" - "ENCODING:UTF-8\r\n" - "CHARSET:NONE\r\n" + f"ENCODING:{encoding_name}\r\n" + f"CHARSET:{charset_name}\r\n" "COMPRESSION:NONE\r\n" "OLDFILEUID:NONE\r\n" "NEWFILEUID:NONE\r\n" diff --git a/src/ofxstatement/tests/test_ofx.py b/src/ofxstatement/tests/test_ofx.py index 8be1f7a..e707fac 100644 --- a/src/ofxstatement/tests/test_ofx.py +++ b/src/ofxstatement/tests/test_ofx.py @@ -12,7 +12,7 @@ DATA:OFXSGML VERSION:102 SECURITY:NONE -ENCODING:UTF-8 +ENCODING:UNICODE CHARSET:NONE COMPRESSION:NONE OLDFILEUID:NONE @@ -130,7 +130,7 @@ def test_ofxWriter_pretty(self) -> None: "DATA:OFXSGML", "VERSION:102", "SECURITY:NONE", - "ENCODING:UTF-8", + "ENCODING:UNICODE", "CHARSET:NONE", "COMPRESSION:NONE", "OLDFILEUID:NONE", diff --git a/src/ofxstatement/tests/test_ofx_invest.py b/src/ofxstatement/tests/test_ofx_invest.py index 34013fa..1b8afb6 100644 --- a/src/ofxstatement/tests/test_ofx_invest.py +++ b/src/ofxstatement/tests/test_ofx_invest.py @@ -12,7 +12,7 @@ DATA:OFXSGML VERSION:102 SECURITY:NONE -ENCODING:UTF-8 +ENCODING:UNICODE CHARSET:NONE COMPRESSION:NONE OLDFILEUID:NONE diff --git a/src/ofxstatement/tool.py b/src/ofxstatement/tool.py index 4fdfc7d..f09956d 100644 --- a/src/ofxstatement/tool.py +++ b/src/ofxstatement/tool.py @@ -201,9 +201,10 @@ def convert(args: argparse.Namespace) -> int: log.error("Statement validation error: %s" % (e.message)) return 2 # Validation error - with smart_open(args.output, settings.get("encoding", None)) as out: + encoding = settings.get("encoding", "utf-8") + with smart_open(args.output, encoding) as out: writer = ofx.OfxWriter(statement) - out.write(writer.toxml(pretty=args.pretty)) + out.write(writer.toxml(pretty=args.pretty, encoding=encoding)) n_lines = len(statement.lines) log.info(