Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for cp-858 (like cp-850 except it includes the euro sign) #15

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Supports the following encodings:
* Windows-1256 / cp-1256
* DOS Codepage Encodings:
* cp-850
* cp-858
* GBK (compatible with GB-2312)
* KOI8
* KOI8-R
Expand Down
1 change: 1 addition & 0 deletions lib/dos.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
library enough_convert_dos;

export 'src/dos/code_page_850.dart';
export 'src/dos/code_page_858.dart';
230 changes: 230 additions & 0 deletions lib/src/dos/code_page_858.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import 'dart:convert' as dart_convert;

import 'dos.dart';

/// Provides a cp858 codec for easy encoding and decoding.
///
/// https://en.wikipedia.org/wiki/Code_page_850#Code_page_858
class CodePage858Codec extends dart_convert.Encoding {
/// Creates a new [CodePage858Codec]
///
/// Set [allowInvalid] to `true` for ignoring invalid data.
/// When invalid data is allowed it will be encoded to ? and decoded to �
const CodePage858Codec({
this.allowInvalid = false,
});

/// Should invalid character codes be ignored?
///
/// When `false`, an invalid character code
/// will throw [FormatException].
final bool allowInvalid;

@override
CodePage858Decoder get decoder => allowInvalid
? const CodePage858Decoder(allowInvalid: true)
: const CodePage858Decoder(allowInvalid: false);

@override
CodePage858Encoder get encoder => allowInvalid
? const CodePage858Encoder(allowInvalid: true)
: const CodePage858Encoder(allowInvalid: false);

@override
String get name => 'cp-858';
}

/// Decodes windows 1250 / cp1250 data.
class CodePage858Decoder extends DosCodePageDecoder {
/// Creates a new [CodePage858Decoder]
///
/// Set [allowInvalid] to `true` for ignoring invalid data.
/// When invalid data is allowed, it will be decoded to �
const CodePage858Decoder({
bool allowInvalid = false,
}) : super(
_cp858Symbols,
allowInvalid: allowInvalid,
);
}

/// Encodes texts into cp-858 / DOS-Latin-1 data
class CodePage858Encoder extends DosCodePageEncoder {
/// Creates a new [CodePage858Encoder]
///
/// Set [allowInvalid] to `true` for ignoring invalid data.
/// When invalid data is allowed, it will be encoded to ?
const CodePage858Encoder({
bool allowInvalid = false,
}) : super(_cp858Map, allowInvalid: allowInvalid);
}

// cSpell:disable
const String _cp858Symbols =
// ignore: lines_longer_than_80_chars
'⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø׃áíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u{00AD}±‗¾¶§÷¸°¨·¹³²■\u{00A0}';

const Map<int, int> _cp858Map = {
// start block:
9786: 1,
9787: 2,
9829: 3,
9830: 4,
9827: 5,
9824: 6,
8226: 7,
9688: 8,
9675: 9,
9689: 10,
9794: 11,
9792: 12,
9834: 13,
9835: 14,
9788: 15,
9658: 16,
9668: 17,
8597: 18,
8252: 19,
// 182: 20,
// 167: 21,
9644: 22,
8616: 23,
8593: 24,
8595: 25,
8594: 26,
8592: 27,
8735: 28,
8596: 29,
9650: 30,
9660: 31,
// upper area:
8962: 127,
199: 128,
252: 129,
233: 130,
226: 131,
228: 132,
224: 133,
229: 134,
231: 135,
234: 136,
235: 137,
232: 138,
239: 139,
238: 140,
236: 141,
196: 142,
197: 143,
201: 144,
230: 145,
198: 146,
244: 147,
246: 148,
242: 149,
251: 150,
249: 151,
255: 152,
214: 153,
220: 154,
248: 155,
163: 156,
216: 157,
215: 158,
402: 159,
225: 160,
237: 161,
243: 162,
250: 163,
241: 164,
209: 165,
170: 166,
186: 167,
191: 168,
174: 169,
172: 170,
189: 171,
188: 172,
161: 173,
171: 174,
187: 175,
9617: 176,
9618: 177,
9619: 178,
9474: 179,
9508: 180,
193: 181,
194: 182,
192: 183,
169: 184,
9571: 185,
9553: 186,
9559: 187,
9565: 188,
162: 189,
165: 190,
9488: 191,
9492: 192,
9524: 193,
9516: 194,
9500: 195,
9472: 196,
9532: 197,
227: 198,
195: 199,
9562: 200,
9556: 201,
9577: 202,
9574: 203,
9568: 204,
9552: 205,
9580: 206,
164: 207,
240: 208,
208: 209,
202: 210,
203: 211,
200: 212,
8364: 213,
205: 214,
206: 215,
207: 216,
9496: 217,
9484: 218,
9608: 219,
9604: 220,
166: 221,
204: 222,
9600: 223,
211: 224,
223: 225,
212: 226,
210: 227,
245: 228,
213: 229,
181: 230,
254: 231,
222: 232,
218: 233,
219: 234,
217: 235,
253: 236,
221: 237,
175: 238,
180: 239,
173: 240,
177: 241,
8215: 242,
190: 243,
182: 244,
167: 245,
247: 246,
184: 247,
176: 248,
168: 249,
183: 250,
185: 251,
179: 252,
178: 253,
9632: 254,
160: 255,
};
2 changes: 1 addition & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: enough_convert
description: Support for character encodings / charsets / codecs missing from `dart:convert` - ISO 8859 / Latin, Windows, DOS, GBK, Big5, and KOI8 R/U.
version: 1.6.0
version: 1.6.0+cp858
homepage: https://github.com/Enough-Software/enough_convert

environment:
Expand Down
106 changes: 106 additions & 0 deletions test/dos/code_page_858_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// ignore_for_file: lines_longer_than_80_chars
// cSpell:disable

import 'dart:convert' as dart_convert;

import 'package:enough_convert/enough_convert.dart';
// import 'package:enough_convert/src/base.dart';
import 'package:test/test.dart';

void main() {
group('Euro sign', () {
// This is the key test for this code page, whose only difference from 850
// is the support for the euro sign (that replaces the "ı" character).
test('encode euro sign', () {
final bytes = const CodePage858Encoder().convert('€');
expect(bytes, [0xD5]);
expect(const CodePage858Decoder().convert(bytes), '€');
});
});

group('Codec tests', () {
test('name', () {
expect(const CodePage858Codec().name, 'cp-858');
// BaseEncoder.createEncodingMap(CodePage858Decoder().startBlock!, 0);
// BaseEncoder.createEncodingMap(CodePage858Decoder().symbols, CodePage858Decoder().startIndex);
});
test('Decoder/encoder classes', () {
expect(const CodePage858Codec().encoder, isA<CodePage858Encoder>());
expect(const CodePage858Codec().decoder, isA<CodePage858Decoder>());
});
});

group('Decoder tests', () {
test('Decode ascii', () {
final bytes = dart_convert.ascii.encode('hello world');
expect(const CodePage858Decoder().convert(bytes), 'hello world');
});

test('Decode cp-858', () {
expect(
const CodePage858Decoder().convert([0x0C, 0x0E, 0x7F, 0x9D]), '♀♫⌂Ø');
final bytes = const CodePage858Encoder()
.convert('hello world motörhead ruleß ok ô');
expect(const CodePage858Decoder().convert(bytes),
'hello world motörhead ruleß ok ô');
});

test('Decode cp-858 with invalid value when invalid input is allowed', () {
expect(
const CodePage858Decoder(allowInvalid: true)
.convert([0x0C, 0x0E, 0x7F, 0x9D, 0xFF1]),
'♀♫⌂Ø�');
});

test('Decode cp-858 with invalid value when invalid input is not allowed',
() {
expect(
() => const CodePage858Decoder()
.convert([0x0C, 0x0E, 0x7F, 0x9D, 0xFF1]),
throwsA(isA<FormatException>()));
});
});

group('Encoder tests', () {
test('encode ascii', () {
final bytes = const CodePage858Encoder().convert('hello world');
expect(bytes, dart_convert.latin1.encode('hello world'));
});

test('encode 858', () {
var bytes = const CodePage858Encoder().convert('♫⌂Ø');
expect(bytes, [0x0E, 0x7F, 0x9D]);
bytes =
const CodePage858Encoder().convert('hello world motörhead ruleß ok');
expect(const CodePage858Decoder().convert(bytes),
'hello world motörhead ruleß ok');

bytes = const CodePage858Encoder()
.convert('‼¶§▬abcABC┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄');
expect(bytes.any((element) => element > 0xFF), false);
});

test('encode more cp-858 ', () {
var bytes = const CodePage858Encoder().convert(
'⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø׃áíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈ€ÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u{00AD}±‗¾¶§÷¸°¨·¹³²■\u{00A0}');
var expected = List.generate(255 - 126, (index) => index + 127);
expect(bytes, expected);

bytes = const CodePage858Encoder().convert('íóúñѪº¿®¬½¼¡«');
expected = List.generate(0xAE - 0xA0, (index) => index + 0xA1);
expect(bytes, expected);
});

test('encode cp-858 with invalid value when invalid input is allowed', () {
final bytes =
const CodePage858Encoder(allowInvalid: true).convert('ÄÖü�');
expect(const CodePage858Decoder().convert(bytes), 'ÄÖü?');
});

test('encode cp-858 with invalid value when invalid input is not allowed',
() {
expect(() => const CodePage858Encoder().convert('ÄÖü�'),
throwsA(isA<FormatException>()));
});
});
}