Initial version

with support for ISO 8859 1-16, Windows 1250, 1251 and 1252 character encodings
Enough-Software · Dec 9, 2020 · 7ef1217 · 7ef1217
1 parent ce9d1b6
commit 7ef1217
Show file tree

Hide file tree

Showing 46 changed files with 4,858 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,13 @@
+# Files and directories created by pub
+.dart_tool/
+.packages
+
+# Omit commiting pubspec.lock for library packages:
+# https://dart.dev/guides/libraries/private-files#pubspeclock
+pubspec.lock
+
+# Conventional directory for build outputs
+build/
+
+# Directory created by dartdoc
+doc/api/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 0.9.0
+
+- Initial version with support for ISO 8859 1-16, Windows 1250, 1251 and 1252 character encodings 
diff --git a/README.md b/README.md
@@ -0,0 +1,61 @@
+Support for ISO 8859 / Latin and Windows character encodings missing from `dart:convert`.
+
+Supporting the following encodings:
+* Latin / ISO 8859 encodings:
+  * Latin 1 / ISO 8859-1
+  * Latin 2 / ISO 8859-2
+  * Latin 3 / ISO 8859-3
+  * Latin 4 / ISO 8859-4
+  * Latin 5 / ISO 8859-5
+  * Latin 6 / ISO 8859-6
+  * Latin 7 / ISO 8859-7
+  * Latin 8 / ISO 8859-8
+  * Latin 9 / ISO 8859-9
+  * Latin 10 / ISO 8859-10
+  * Latin 11 / ISO 8859-11
+  * Latin 13 / ISO 8859-13
+  * Latin 14 / ISO 8859-14
+  * Latin 15 / ISO 8859-15
+  * Latin 16 / ISO 8859-16
+* Windows Codepage Encodings:
+  * Windows-1250 / cp-1250
+  * Windows-1251 / cp-1251
+  * Windows-1252 / cp-1252
+
+
+## Usage
+
+Using `enough_convert` is pretty straight forward:
+
+```dart
+import 'package:enough_convert/enough_convert.dart';
+
+main() {
+  final codec = const Windows1252Codec(allowInvalid: false);
+  final input = 'Il faut être bête quand même.';
+  final encoded = codec.encode(input);
+  final decoded = codec.decode([...encoded]);
+  print('${codec.name}: encode "$input" to "$encoded"');
+  print('${codec.name}: decode $encoded to "$decoded"');
+
+}
+```
+
+## Installation
+Add this dependency your pubspec.yaml file:
+
+```
+dependencies:
+  enough_convert: ^0.9.0
+```
+The latest version or `enough_convert` is [![enough_convert version](https://img.shields.io/pub/v/enough_convert.svg)](https://pub.dartlang.org/packages/enough_convert).
+
+
+## Features and bugs
+
+Please file feature requests and bugs at the [issue tracker][tracker].
+
+[tracker]: https://github.com/Enough-Software/enough_convert/issues
+
+## License
+`enough_convert` is licensed under the commercial friendly [Mozilla Public License 2.0](LICENSE)
diff --git a/analysis_options.yaml b/analysis_options.yaml
@@ -0,0 +1,14 @@
+# Defines a default set of lint rules enforced for
+# projects at Google. For details and rationale,
+# see https://github.com/dart-lang/pedantic#enabled-lints.
+include: package:pedantic/analysis_options.yaml
+
+# For lint rules and documentation, see http://dart-lang.github.io/linter/lints.
+# Uncomment to specify additional rules.
+# linter:
+#   rules:
+#     - camel_case_types
+
+analyzer:
+#   exclude:
+#     - path/to/excluded/files/**
diff --git a/example/enough_convert_example.dart b/example/enough_convert_example.dart
@@ -0,0 +1,109 @@
+import 'dart:convert' as cnvrt;
+
+import 'package:enough_convert/enough_convert.dart';
+
+void main() {
+  latin2();
+  latin3();
+  latin4();
+  latin5();
+  latin6();
+  latin7();
+  latin8();
+  latin9();
+  latin10();
+  latin11();
+  latin13();
+  latin14();
+  latin15();
+  latin16();
+  windows1250();
+  windows1251();
+  windows1252();
+}
+
+void latin2() {
+  roundtrip(
+      const Latin2Codec(allowInvalid: false), 'Těší mě, že vás poznávám!');
+}
+
+void latin3() {
+  roundtrip(
+      const Latin3Codec(allowInvalid: false), 'Tanıştığımıza memnun oldum!');
+}
+
+void latin4() {
+  roundtrip(const Latin4Codec(allowInvalid: false), 'Priecājos iepazīties!');
+}
+
+void latin5() {
+  roundtrip(const Latin5Codec(allowInvalid: false), 'Приятно встретиться!');
+}
+
+void latin6() {
+  roundtrip(const Latin6Codec(allowInvalid: false), 'سعدت بلقائك');
+}
+
+void latin7() {
+  roundtrip(
+      const Latin7Codec(allowInvalid: false), 'Χαίρομαι που σας γνωρίζω!');
+}
+
+void latin8() {
+  roundtrip(const Latin8Codec(allowInvalid: false),
+      'נעים להכיר אותך.נעים להכיר אותך-.');
+}
+
+void latin9() {
+  roundtrip(
+      const Latin9Codec(allowInvalid: false), 'Tanıştığımıza memnun oldum!');
+}
+
+void latin10() {
+  roundtrip(const Latin10Codec(allowInvalid: false), 'Hyggelig å møte deg!');
+}
+
+void latin11() {
+  roundtrip(const Latin11Codec(allowInvalid: false), 'ยินดีที่ได้พบคุณ!');
+}
+
+void latin13() {
+  roundtrip(const Latin13Codec(allowInvalid: false), 'Hyggelig å møte deg!');
+}
+
+void latin14() {
+  roundtrip(const Latin14Codec(allowInvalid: false),
+      'Má tú ag lorg cara gan locht, béidh tú gan cara go deo.');
+}
+
+void latin15() {
+  roundtrip(
+      const Latin15Codec(allowInvalid: false), 'Il faut être bête quand même.');
+}
+
+void latin16() {
+  roundtrip(
+      const Latin16Codec(allowInvalid: false), 'Örülök, hogy találkoztunk!');
+}
+
+void windows1250() {
+  roundtrip(const Windows1250Codec(allowInvalid: false),
+      'Teší ma, že vás spoznávam!');
+}
+
+void windows1251() {
+  roundtrip(const Windows1251Codec(allowInvalid: false),
+      'Радий познайомитися з Вами!');
+}
+
+void windows1252() {
+  roundtrip(const Windows1252Codec(allowInvalid: false),
+      'Il faut être bête quand même.');
+}
+
+void roundtrip(cnvrt.Encoding codec, String input) {
+  final encoded = codec.encode(input);
+  final decoded = codec.decode([...encoded]);
+  print('${codec.name}: encode "$input" to "$encoded"');
+  print('${codec.name}: decode $encoded to "$decoded"');
+}
diff --git a/lib/base.dart b/lib/base.dart
@@ -0,0 +1,121 @@
+import 'dart:convert' as cnvrt;
+
+/// Contains base classes for latin 2  to latin 16 / iso-8859-XX codecs
+
+/// Provides a simple, non chunkable iso-8859-XX  decoder.
+/// Note that the decoder directly modifies the data given in `convert(List<int> data)`,
+/// in doubt create a new array first, e.g.
+/// ```dart
+/// decoder.convert([...data]);
+/// ```
+class BaseDecoder extends cnvrt.Converter<List<int>, String> {
+  final String symbols;
+  final int startIndex;
+  final bool allowInvalid;
+
+  /// Creates a new 8bit decoder.
+  /// [symbols] contain all symbols different than UTF8 from the specified [startIndex] onwards.
+  /// The length of the [symbols] need to be `255` / `0xFF` minus the [startIndex].
+  /// Set [allowedInvalid] to true in case invalid characters sequences should be at least readable.
+  const BaseDecoder(this.symbols, this.startIndex, {this.allowInvalid = false})
+      : assert(symbols?.length == 255 - startIndex);
+
+  @override
+  String convert(List<int> bytes, [int start = 0, int end]) {
+    end = RangeError.checkValidRange(start, end, bytes.length);
+    if (end == null) {
+      throw RangeError('Invalid range');
+    }
+    // note: this directly modifies the given data, so decoding the
+    // same byte array twice will not work
+    for (var i = start; i < end; i++) {
+      final byte = bytes[i];
+      if ((byte & ~0xFF) != 0) {
+        if (!allowInvalid) {
+          throw FormatException('Invalid value in input: $byte at position $i');
+        } else {
+          bytes[i] = 0xFFFD; // unicode �
+        }
+      } else if (byte > startIndex) {
+        final index = byte - (startIndex + 1);
+        bytes[i] = symbols.codeUnitAt(index);
+      }
+    }
+    return String.fromCharCodes(bytes, start, end);
+  }
+}
+
+/// Provides a simple, non chunkable 8bit encoder.
+class BaseEncoder extends cnvrt.Converter<String, List<int>> {
+  final bool allowInvalid;
+  final Map<int, int> encodingMap;
+  final int startIndex;
+
+  /// Creates a new encoder.
+  /// Set [allowedInvalid] to true in case invalid characters should be translated to question marks.
+  const BaseEncoder(this.encodingMap, this.startIndex,
+      {this.allowInvalid = false});
+
+  /// Static helper function to generate a conversion map from a symbols string.
+  static Map<int, int> createEncodingMap(String symbols, int startIndex) {
+    final runes = symbols.runes;
+    final map = <int, int>{};
+    var index = 0;
+    if (runes.length != 255 - startIndex) {
+      print(
+          'WARNING: there are not ${255 - startIndex} symbols but ${runes.length} runes in the specified map - is the given startIndex $startIndex correct?');
+    }
+    for (final rune in runes) {
+      if (rune != 0x3F) {
+        // "?" denote an empty slot in the map
+        final value = index + startIndex + 1;
+        if (map.containsValue(value)) {
+          final symbol = symbols.substring(index, index + 1);
+          final firstIndex = symbols.indexOf(symbol);
+          final lastIndex = symbols.lastIndexOf(symbol);
+          throw FormatException(
+              'Duplicate value $value for isoSymbols "$symbol" at index $index - in symbols to found at $firstIndex and $lastIndex');
+        }
+        if (value <= startIndex) {
+          final symbol = symbols.substring(index, index + 1);
+          throw FormatException(
+              'Invalid value $value for "$symbol" at index $index');
+        }
+        map[rune] = value;
+        print('\t$rune: $value,');
+      }
+      index++;
+    }
+    return map;
+  }
+
+  @override
+  List<int> convert(String input, [int start = 0, int end]) {
+    final runes = input.runes;
+    end = RangeError.checkValidRange(start, end, runes.length);
+    if (end == null) {
+      throw RangeError('Invalid range');
+    }
+    var runesList = runes.toList(growable: false);
+    if (start > 0 || end < runesList.length) {
+      runesList = runesList.sublist(start, end);
+    }
+    for (var i = 0; i < runesList.length; i++) {
+      var rune = runesList[i];
+      if (rune > startIndex) {
+        final value = encodingMap[rune];
+        if (value == null) {
+          if (!allowInvalid) {
+            throw FormatException(
+                'Invalid value in input: ${String.fromCharCode(rune)} ($rune) at $i');
+          } else {
+            runesList[i] = 0x3F; // ?
+          }
+        } else {
+          runesList[i] = value;
+        }
+      }
+    }
+    return runesList;
+  }
+}
diff --git a/lib/enough_convert.dart b/lib/enough_convert.dart
@@ -0,0 +1,24 @@
+/// Provides missing common character encoders / decoders / codecs / encodings for Dart.
+library enough_convert;
+
+export 'base.dart';
+export 'latin/latin.dart';
+export 'latin/latin1.dart';
+export 'latin/latin2.dart';
+export 'latin/latin3.dart';
+export 'latin/latin4.dart';
+export 'latin/latin5.dart';
+export 'latin/latin6.dart';
+export 'latin/latin7.dart';
+export 'latin/latin8.dart';
+export 'latin/latin9.dart';
+export 'latin/latin10.dart';
+export 'latin/latin11.dart';
+export 'latin/latin13.dart';
+export 'latin/latin14.dart';
+export 'latin/latin15.dart';
+export 'latin/latin16.dart';
+export 'windows/windows.dart';
+export 'windows/windows1250.dart';
+export 'windows/windows1251.dart';
+export 'windows/windows1252.dart';
diff --git a/lib/latin/latin.dart b/lib/latin/latin.dart
@@ -0,0 +1,25 @@
+import 'package:enough_convert/base.dart';
+
+/// Contains base classes for latin 2  to latin 16 / iso-8859-XX codecs
+
+/// Provides a simple, non chunkable iso-8859-XX  decoder.
+/// Note that the decoder directly modifies the data given in `convert(List<int> data)`,
+/// in doubt create a new array first, e.g.
+/// ```dart
+/// decoder.convert([...data]);
+/// ```
+class LatinDecoder extends BaseDecoder {
+  /// Creates a new latin 1 decoder.
+  /// The [symbols] need to be exactly `95` characters long.
+  /// Set [allowedInvalid] to true in case invalid characters sequences should be at least readable.
+  const LatinDecoder(String symbols, {bool allowInvalid = false})
+      : super(symbols, 0xA0, allowInvalid: allowInvalid);
+}
+
+/// Provides a simple, non chunkable iso-8859-XX encoder.
+class LatinEncoder extends BaseEncoder {
+  /// Creates a new latin / iso-8859-XX encoder.
+  /// Set [allowedInvalid] to true in case invalid characters should be translated to question marks.
+  const LatinEncoder(Map<int, int> encodingMap, {bool allowInvalid = false})
+      : super(encodingMap, 0xA0, allowInvalid: allowInvalid);
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		## 0.9.0

		- Initial version with support for ISO 8859 1-16, Windows 1250, 1251 and 1252 character encodings