From 471ee831e9a7fae289b8fd4db0547077c1f66a41 Mon Sep 17 00:00:00 2001 From: Mauro Vanetti Date: Tue, 23 May 2023 12:16:01 +0200 Subject: [PATCH 1/2] Added support for cp-858 (like cp-850 except it includes the euro sign) --- README.md | 1 + lib/dos.dart | 1 + lib/src/dos/code_page_858.dart | 230 +++++++++++++++++++++++++++++++ test/dos/code_page_858_test.dart | 106 ++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 lib/src/dos/code_page_858.dart create mode 100644 test/dos/code_page_858_test.dart diff --git a/README.md b/README.md index e32b943..7d30bff 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Supports the following encodings: * Windows-1256 / cp-1256 * DOS Codepage Encodings: * cp-850 + * cp-858 * GBK (compatible with GB-2312) * KOI8 * KOI8-R diff --git a/lib/dos.dart b/lib/dos.dart index e3aa199..ec4b37f 100644 --- a/lib/dos.dart +++ b/lib/dos.dart @@ -8,3 +8,4 @@ library enough_convert_dos; export 'src/dos/code_page_850.dart'; +export 'src/dos/code_page_858.dart'; diff --git a/lib/src/dos/code_page_858.dart b/lib/src/dos/code_page_858.dart new file mode 100644 index 0000000..79b1959 --- /dev/null +++ b/lib/src/dos/code_page_858.dart @@ -0,0 +1,230 @@ +import 'dart:convert' as dart_convert; + +import 'dos.dart'; + +/// Provides a cp858 codec for easy encoding and decoding. +/// +/// https://en.wikipedia.org/wiki/Code_page_850#Code_page_858 +class CodePage858Codec extends dart_convert.Encoding { + /// Creates a new [CodePage858Codec] + /// + /// Set [allowInvalid] to `true` for ignoring invalid data. + /// When invalid data is allowed it will be encoded to ? and decoded to � + const CodePage858Codec({ + this.allowInvalid = false, + }); + + /// Should invalid character codes be ignored? + /// + /// When `false`, an invalid character code + /// will throw [FormatException]. + final bool allowInvalid; + + @override + CodePage858Decoder get decoder => allowInvalid + ? const CodePage858Decoder(allowInvalid: true) + : const CodePage858Decoder(allowInvalid: false); + + @override + CodePage858Encoder get encoder => allowInvalid + ? const CodePage858Encoder(allowInvalid: true) + : const CodePage858Encoder(allowInvalid: false); + + @override + String get name => 'cp-858'; +} + +/// Decodes windows 1250 / cp1250 data. +class CodePage858Decoder extends DosCodePageDecoder { + /// Creates a new [CodePage858Decoder] + /// + /// Set [allowInvalid] to `true` for ignoring invalid data. + /// When invalid data is allowed, it will be decoded to � + const CodePage858Decoder({ + bool allowInvalid = false, + }) : super( + _cp858Symbols, + allowInvalid: allowInvalid, + ); +} + +/// Encodes texts into cp-858 / DOS-Latin-1 data +class CodePage858Encoder extends DosCodePageEncoder { + /// Creates a new [CodePage858Encoder] + /// + /// Set [allowInvalid] to `true` for ignoring invalid data. + /// When invalid data is allowed, it will be encoded to ? + const CodePage858Encoder({ + bool allowInvalid = false, + }) : super(_cp858Map, allowInvalid: allowInvalid); +} + +// cSpell:disable +const String _cp858Symbols = +// ignore: lines_longer_than_80_chars + '⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø׃áíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u{00AD}±‗¾¶§÷¸°¨·¹³²■\u{00A0}'; + +const Map _cp858Map = { + // start block: + 9786: 1, + 9787: 2, + 9829: 3, + 9830: 4, + 9827: 5, + 9824: 6, + 8226: 7, + 9688: 8, + 9675: 9, + 9689: 10, + 9794: 11, + 9792: 12, + 9834: 13, + 9835: 14, + 9788: 15, + 9658: 16, + 9668: 17, + 8597: 18, + 8252: 19, + // 182: 20, + // 167: 21, + 9644: 22, + 8616: 23, + 8593: 24, + 8595: 25, + 8594: 26, + 8592: 27, + 8735: 28, + 8596: 29, + 9650: 30, + 9660: 31, + // upper area: + 8962: 127, + 199: 128, + 252: 129, + 233: 130, + 226: 131, + 228: 132, + 224: 133, + 229: 134, + 231: 135, + 234: 136, + 235: 137, + 232: 138, + 239: 139, + 238: 140, + 236: 141, + 196: 142, + 197: 143, + 201: 144, + 230: 145, + 198: 146, + 244: 147, + 246: 148, + 242: 149, + 251: 150, + 249: 151, + 255: 152, + 214: 153, + 220: 154, + 248: 155, + 163: 156, + 216: 157, + 215: 158, + 402: 159, + 225: 160, + 237: 161, + 243: 162, + 250: 163, + 241: 164, + 209: 165, + 170: 166, + 186: 167, + 191: 168, + 174: 169, + 172: 170, + 189: 171, + 188: 172, + 161: 173, + 171: 174, + 187: 175, + 9617: 176, + 9618: 177, + 9619: 178, + 9474: 179, + 9508: 180, + 193: 181, + 194: 182, + 192: 183, + 169: 184, + 9571: 185, + 9553: 186, + 9559: 187, + 9565: 188, + 162: 189, + 165: 190, + 9488: 191, + 9492: 192, + 9524: 193, + 9516: 194, + 9500: 195, + 9472: 196, + 9532: 197, + 227: 198, + 195: 199, + 9562: 200, + 9556: 201, + 9577: 202, + 9574: 203, + 9568: 204, + 9552: 205, + 9580: 206, + 164: 207, + 240: 208, + 208: 209, + 202: 210, + 203: 211, + 200: 212, + 8364: 213, + 205: 214, + 206: 215, + 207: 216, + 9496: 217, + 9484: 218, + 9608: 219, + 9604: 220, + 166: 221, + 204: 222, + 9600: 223, + 211: 224, + 223: 225, + 212: 226, + 210: 227, + 245: 228, + 213: 229, + 181: 230, + 254: 231, + 222: 232, + 218: 233, + 219: 234, + 217: 235, + 253: 236, + 221: 237, + 175: 238, + 180: 239, + 173: 240, + 177: 241, + 8215: 242, + 190: 243, + 182: 244, + 167: 245, + 247: 246, + 184: 247, + 176: 248, + 168: 249, + 183: 250, + 185: 251, + 179: 252, + 178: 253, + 9632: 254, + 160: 255, +}; diff --git a/test/dos/code_page_858_test.dart b/test/dos/code_page_858_test.dart new file mode 100644 index 0000000..a8fb26d --- /dev/null +++ b/test/dos/code_page_858_test.dart @@ -0,0 +1,106 @@ +// ignore_for_file: lines_longer_than_80_chars +// cSpell:disable + +import 'dart:convert' as dart_convert; + +import 'package:enough_convert/enough_convert.dart'; +// import 'package:enough_convert/src/base.dart'; +import 'package:test/test.dart'; + +void main() { + group('Euro sign', () { + // This is the key test for this code page, whose only difference from 850 + // is the support for the euro sign (that replaces the "ı" character). + test('encode euro sign', () { + final bytes = const CodePage858Encoder().convert('€'); + expect(bytes, [0xD5]); + expect(const CodePage858Decoder().convert(bytes), '€'); + }); + }); + + group('Codec tests', () { + test('name', () { + expect(const CodePage858Codec().name, 'cp-858'); + // BaseEncoder.createEncodingMap(CodePage858Decoder().startBlock!, 0); + // BaseEncoder.createEncodingMap(CodePage858Decoder().symbols, CodePage858Decoder().startIndex); + }); + test('Decoder/encoder classes', () { + expect(const CodePage858Codec().encoder, isA()); + expect(const CodePage858Codec().decoder, isA()); + }); + }); + + group('Decoder tests', () { + test('Decode ascii', () { + final bytes = dart_convert.ascii.encode('hello world'); + expect(const CodePage858Decoder().convert(bytes), 'hello world'); + }); + + test('Decode cp-858', () { + expect( + const CodePage858Decoder().convert([0x0C, 0x0E, 0x7F, 0x9D]), '♀♫⌂Ø'); + final bytes = const CodePage858Encoder() + .convert('hello world motörhead ruleß ok ô'); + expect(const CodePage858Decoder().convert(bytes), + 'hello world motörhead ruleß ok ô'); + }); + + test('Decode cp-858 with invalid value when invalid input is allowed', () { + expect( + const CodePage858Decoder(allowInvalid: true) + .convert([0x0C, 0x0E, 0x7F, 0x9D, 0xFF1]), + '♀♫⌂Ø�'); + }); + + test('Decode cp-858 with invalid value when invalid input is not allowed', + () { + expect( + () => const CodePage858Decoder() + .convert([0x0C, 0x0E, 0x7F, 0x9D, 0xFF1]), + throwsA(isA())); + }); + }); + + group('Encoder tests', () { + test('encode ascii', () { + final bytes = const CodePage858Encoder().convert('hello world'); + expect(bytes, dart_convert.latin1.encode('hello world')); + }); + + test('encode 858', () { + var bytes = const CodePage858Encoder().convert('♫⌂Ø'); + expect(bytes, [0x0E, 0x7F, 0x9D]); + bytes = + const CodePage858Encoder().convert('hello world motörhead ruleß ok'); + expect(const CodePage858Decoder().convert(bytes), + 'hello world motörhead ruleß ok'); + + bytes = const CodePage858Encoder() + .convert('‼¶§▬abcABC┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄'); + expect(bytes.any((element) => element > 0xFF), false); + }); + + test('encode more cp-858 ', () { + var bytes = const CodePage858Encoder().convert( + '⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø׃áíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u{00AD}±‗¾¶§÷¸°¨·¹³²■\u{00A0}'); + var expected = List.generate(255 - 126, (index) => index + 127); + expect(bytes, expected); + + bytes = const CodePage858Encoder().convert('íóúñѪº¿®¬½¼¡«'); + expected = List.generate(0xAE - 0xA0, (index) => index + 0xA1); + expect(bytes, expected); + }); + + test('encode cp-858 with invalid value when invalid input is allowed', () { + final bytes = + const CodePage858Encoder(allowInvalid: true).convert('ÄÖü�'); + expect(const CodePage858Decoder().convert(bytes), 'ÄÖü?'); + }); + + test('encode cp-858 with invalid value when invalid input is not allowed', + () { + expect(() => const CodePage858Encoder().convert('ÄÖü�'), + throwsA(isA())); + }); + }); +} From 7d33fc49a64b6651b032c5c12421cb662e680b87 Mon Sep 17 00:00:00 2001 From: Mauro Vanetti Date: Tue, 23 May 2023 12:27:35 +0200 Subject: [PATCH 2/2] v1.6.0+cp858 --- pubspec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubspec.yaml b/pubspec.yaml index cd21f84..5e6bde7 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: enough_convert description: Support for character encodings / charsets / codecs missing from `dart:convert` - ISO 8859 / Latin, Windows, DOS, GBK, Big5, and KOI8 R/U. -version: 1.6.0 +version: 1.6.0+cp858 homepage: https://github.com/Enough-Software/enough_convert environment: