diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9c71cd6..27bad06 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,11 +18,10 @@ permissions: jobs: test: - runs-on: ubuntu-latest strategy: matrix: - ruby-version: ['2.2', '2.3', '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3'] + ruby-version: ['3.1', '3.2', '3.3'] steps: - uses: actions/checkout@v3 diff --git a/.rubocop.yml b/.rubocop.yml index 6ca5439..ff03480 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -2,73 +2,12 @@ require: - rubocop-rspec - rubocop-performance -Layout/CaseIndentation: - Enabled: false - -Layout/ElseAlignment: - Enabled: false - -Layout/EmptyLinesAroundBlockBody: - Enabled: false - -Layout/EndAlignment: - Enabled: true - EnforcedStyleAlignWith: start_of_line - -Layout/FirstArrayElementIndentation: - EnforcedStyle: consistent - -Layout/FirstHashElementIndentation: - EnforcedStyle: consistent - -Layout/LineLength: - Enabled: false - -Layout/MultilineHashBraceLayout: - Enabled: true - EnforcedStyle: symmetrical - -Layout/MultilineMethodCallIndentation: - EnforcedStyle: indented - -Layout/MultilineOperationIndentation: - EnforcedStyle: indented - -Layout/ParameterAlignment: - Enabled: true - EnforcedStyle: with_fixed_indentation - -Layout/SpaceInsideHashLiteralBraces: - EnforcedStyle: no_space - -Layout/SpaceInLambdaLiteral: - EnforcedStyle: require_space - -Layout/TrailingWhitespace: - Enabled: false - -Lint/AmbiguousBlockAssociation: - Enabled: false - -Lint/AssignmentInCondition: - Enabled: false - -Lint/RedundantSplatExpansion: +Gemspec/RequiredRubyVersion: Enabled: false Metrics/AbcSize: Enabled: false -Metrics/BlockLength: - CountComments: false - Enabled: true - Exclude: - - spec/**/*_spec.rb - Max: 100 - -Metrics/ClassLength: - Enabled: false - Metrics/CyclomaticComplexity: Max: 10 @@ -81,87 +20,17 @@ Metrics/ModuleLength: Metrics/PerceivedComplexity: Max: 10 -Naming/FileName: - Enabled: false - -Naming/MethodParameterName: - Enabled: false - -RSpec/AnyInstance: - Enabled: false - -RSpec/DescribeClass: - Exclude: - - spec/requests/**/*_spec.rb - -RSpec/DescribedClass: - Enabled: false - -RSpec/DescribeMethod: - Enabled: false - RSpec/ExampleLength: - Max: 25 - Exclude: - - spec/workers/**/*_spec.rb - -RSpec/ImplicitSubject: - Enabled: false - -RSpec/LetSetup: - Enabled: false - -RSpec/MessageSpies: Enabled: false RSpec/MultipleExpectations: Enabled: false -RSpec/NestedGroups: - Max: 5 - RSpec/NotToNot: EnforcedStyle: to_not -Security/YAMLLoad: - Exclude: - - app/models/entity.rb - -Style/Alias: - Enabled: false - -Style/AsciiComments: - Enabled: false - -Style/ClassAndModuleChildren: - Enabled: false - Style/Documentation: Enabled: false -Style/EmptyMethod: - EnforcedStyle: expanded - -Style/FormatStringToken: - Enabled: false - Style/FrozenStringLiteralComment: Enabled: false - -Style/Lambda: - Enabled: false - -Style/NumericPredicate: - Enabled: false - -Style/PerlBackrefs: - Enabled: false - -Style/RescueModifier: - Enabled: false - -Style/SafeNavigation: - Enabled: false - -Style/SymbolArray: - EnforcedStyle: brackets diff --git a/.ruby-version b/.ruby-version index be94e6f..15a2799 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -3.2.2 +3.3.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a4b56a..0eb7674 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 3.0.0 + +- Drop support for Rubies older than 3.1 + ## 2.0.2 - Fix issue with some non-combining characters followed by a valid character diff --git a/README.md b/README.md index 748566b..f9f8a0c 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,9 @@ ANSEL provides character set conversion from ANSEL to UTF-8 ANSEL is compatible with the following Rubies: -* 2.2.x -* 2.3.x -* 2.4.x -* 2.5.x -* 2.7.x -* 3.0.x * 3.1.x * 3.2.x +* 3.3.x If you need ANSEL conversion in Ruby 1.8, see my [ansel_iconv](http://github.com/infused/ansel_iconv) project. diff --git a/ansel.gemspec b/ansel.gemspec index de04730..f3fc388 100644 --- a/ansel.gemspec +++ b/ansel.gemspec @@ -1,7 +1,7 @@ # encoding: ascii-8bit -lib = File.expand_path('../lib/', __FILE__) -$LOAD_PATH.unshift lib unless $:.include?(lib) +lib = File.expand_path('lib/', __dir__) +$LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib) require 'ansel/version' Gem::Specification.new do |s| @@ -12,6 +12,7 @@ Gem::Specification.new do |s| s.homepage = 'http://github.com/infused/ansel' s.summary = 'Convert ANSEL encoded text to UTF-8' s.description = 'Convert ANSEL encoded text to UTF-8' + s.license = 'MIT' s.rdoc_options = ['--charset=UTF-8'] s.extra_rdoc_files = ['README.md', 'CHANGELOG.md', 'LICENSE'] @@ -19,4 +20,5 @@ Gem::Specification.new do |s| s.require_paths = ['lib'] s.required_rubygems_version = '>= 1.3.0' + s.required_ruby_version = '>= 3.1.0' end diff --git a/lib/ansel/converter.rb b/lib/ansel/converter.rb index 1666b0a..0a137ff 100644 --- a/lib/ansel/converter.rb +++ b/lib/ansel/converter.rb @@ -18,7 +18,7 @@ def convert(string) scanner = StringScanner.new(string) until scanner.eos? byte = scanner.get_byte - char = byte.unpack('C')[0] + char = byte.unpack1('C') char_hex = char.to_s(16).upcase case char @@ -40,7 +40,7 @@ def convert(string) end else output << utf16_to_utf8(ANSI_TO_UTF16_MAP['ERR']) - scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte + scanner.get_byte if scanner.get_byte.unpack1('C') >= 0xE0 # ignore the next byte end end diff --git a/lib/ansel/version.rb b/lib/ansel/version.rb index b1c49b7..bac504e 100644 --- a/lib/ansel/version.rb +++ b/lib/ansel/version.rb @@ -1,5 +1,5 @@ # encoding: ascii-8bit module ANSEL - VERSION = '2.0.3' + VERSION = '3.0.0'.freeze end diff --git a/spec/ansel/converter_spec.rb b/spec/ansel/converter_spec.rb new file mode 100644 index 0000000..0dfa700 --- /dev/null +++ b/spec/ansel/converter_spec.rb @@ -0,0 +1,84 @@ +# encoding: ascii-8bit + +require 'spec_helper' + +describe ANSEL::Converter do + let(:ansel) { described_class.new } + + describe '#iconv' do + it 'does not convert ASCII characters' do + expect(ansel.convert("\x20")).to eq ' ' + expect(ansel.convert("\x78")).to eq 'x' + end + + it 'converts invalid characters to the unicode replacement character' do + expect(ansel.convert("\xBE\x00")).to eq '�'.force_encoding('utf-8') + expect(ansel.convert("\xD1\x00")).to eq '�'.force_encoding('utf-8') + end + + it 'converts valid ANSEL characters to UTF-8 equivalents' do + # ANSEL non-combining mappings + expect(ansel.convert("\x88\x00")).to eq '' + expect(ansel.convert("\x89\x00")).to eq '' + expect(ansel.convert("\x8D\x00")).to eq '' + expect(ansel.convert("\x8E\x00")).to eq '' + expect(ansel.convert("\xA1\x00")).to eq 'Ł'.force_encoding('utf-8') + expect(ansel.convert("\xA2\x00")).to eq 'Ø'.force_encoding('utf-8') + expect(ansel.convert("\xA3\x00")).to eq 'Đ'.force_encoding('utf-8') + expect(ansel.convert("\xA4\x00")).to eq 'Þ'.force_encoding('utf-8') + expect(ansel.convert("\xA5\x00")).to eq 'Æ'.force_encoding('utf-8') + expect(ansel.convert("\xA6\x00")).to eq 'Œ'.force_encoding('utf-8') + expect(ansel.convert("\xA7\x00")).to eq 'ʹ'.force_encoding('utf-8') + expect(ansel.convert("\xA8\x00")).to eq '·'.force_encoding('utf-8') + expect(ansel.convert("\xA9\x00")).to eq '♭'.force_encoding('utf-8') + expect(ansel.convert("\xAA\x00")).to eq '®'.force_encoding('utf-8') + expect(ansel.convert("\xAB\x00")).to eq '±'.force_encoding('utf-8') + expect(ansel.convert("\xAB\x00")).to eq '±'.force_encoding('utf-8') + expect(ansel.convert("\xAC\x00")).to eq 'Ơ'.force_encoding('utf-8') + expect(ansel.convert("\xAD\x00")).to eq 'Ư'.force_encoding('utf-8') + expect(ansel.convert("\xAE\x00")).to eq 'ʼ'.force_encoding('utf-8') + expect(ansel.convert("\xB0\x00")).to eq 'ʻ'.force_encoding('utf-8') + expect(ansel.convert("\xB1\x00")).to eq 'ł'.force_encoding('utf-8') + expect(ansel.convert("\xB2\x00")).to eq 'ø'.force_encoding('utf-8') + expect(ansel.convert("\xB3\x00")).to eq 'đ'.force_encoding('utf-8') + expect(ansel.convert("\xB4\x00")).to eq 'þ'.force_encoding('utf-8') + expect(ansel.convert("\xB5\x00")).to eq 'æ'.force_encoding('utf-8') + expect(ansel.convert("\xB6\x00")).to eq 'œ'.force_encoding('utf-8') + expect(ansel.convert("\xB7\x00")).to eq 'ʺ'.force_encoding('utf-8') + expect(ansel.convert("\xB8\x00")).to eq 'ı'.force_encoding('utf-8') + expect(ansel.convert("\xB9\x00")).to eq '£'.force_encoding('utf-8') + expect(ansel.convert("\xBA\x00")).to eq 'ð'.force_encoding('utf-8') + expect(ansel.convert("\xBC\x00")).to eq 'ơ'.force_encoding('utf-8') + expect(ansel.convert("\xBD\x00")).to eq 'ư'.force_encoding('utf-8') + expect(ansel.convert("\xC0\x00")).to eq '°'.force_encoding('utf-8') + expect(ansel.convert("\xC1\x00")).to eq 'ℓ'.force_encoding('utf-8') + expect(ansel.convert("\xC2\x00")).to eq '℗'.force_encoding('utf-8') + expect(ansel.convert("\xC3\x00")).to eq '©'.force_encoding('utf-8') + expect(ansel.convert("\xC4\x00")).to eq '♯'.force_encoding('utf-8') + expect(ansel.convert("\xC5\x00")).to eq '¿'.force_encoding('utf-8') + expect(ansel.convert("\xC6\x00")).to eq '¡'.force_encoding('utf-8') + expect(ansel.convert("\xC7\x00")).to eq 'ß'.force_encoding('utf-8') + expect(ansel.convert("\xC8\x00")).to eq '€'.force_encoding('utf-8') + + # ANSEL combining characters + expect(ansel.convert("\xE0\x41")).to eq 'Ả'.force_encoding('utf-8') + expect(ansel.convert("\xF6\x4C")).to eq 'Ḻ'.force_encoding('utf-8') + expect(ansel.convert("\xF6")).to eq '̲'.force_encoding('utf-8') + expect(ansel.convert("\xF9")).to eq '̮'.force_encoding('utf-8') + expect(ansel.convert("\xF9\x48")).to eq 'Ḫ'.force_encoding('utf-8') + expect(ansel.convert("\xF2\xE3\x41")).to eq 'Ậ'.force_encoding('utf-8') + expect(ansel.convert("\xF2\x79")).to eq 'ỵ'.force_encoding('utf-8') + expect(ansel.convert("\xF2")).to eq '̣'.force_encoding('utf-8') + + # Specific issues + expect(ansel.convert("\x4D\x65\x6C\xB2\x79")).to eq 'Meløy'.force_encoding('utf-8') + end + + it 'converts full text correctly' do + expect(ansel.convert('What is the question?')).to eq 'What is the question?' + expect(ansel.convert("\xC5\x00What is the question?")).to eq '¿What is the question?'.force_encoding('utf-8') + expect(ansel.convert("\xC3\x00 1994")).to eq '© 1994'.force_encoding('utf-8') + expect(ansel.convert("\xB9\x004.59")).to eq '£4.59'.force_encoding('utf-8') + end + end +end diff --git a/spec/ansel_converter_spec.rb b/spec/ansel_converter_spec.rb deleted file mode 100644 index ad01870..0000000 --- a/spec/ansel_converter_spec.rb +++ /dev/null @@ -1,86 +0,0 @@ -# encoding: ascii-8bit - -require 'spec_helper' - -describe ANSEL::Converter do - before do - @ansel = ANSEL::Converter.new - end - - describe '#iconv' do - it 'does not convert ASCII characters' do - expect(@ansel.convert("\x20")).to eq ' ' - expect(@ansel.convert("\x78")).to eq 'x' - end - - it 'converts invalid characters to the unicode replacement character' do - expect(@ansel.convert("\xBE\x00")).to eq '�'.force_encoding('utf-8') - expect(@ansel.convert("\xD1\x00")).to eq '�'.force_encoding('utf-8') - end - - it 'converts valid ANSEL characters to UTF-8 equivalents' do - # ANSEL non-combining mappings - expect(@ansel.convert("\x88\x00")).to eq '' - expect(@ansel.convert("\x89\x00")).to eq '' - expect(@ansel.convert("\x8D\x00")).to eq '' - expect(@ansel.convert("\x8E\x00")).to eq '' - expect(@ansel.convert("\xA1\x00")).to eq 'Ł'.force_encoding('utf-8') - expect(@ansel.convert("\xA2\x00")).to eq 'Ø'.force_encoding('utf-8') - expect(@ansel.convert("\xA3\x00")).to eq 'Đ'.force_encoding('utf-8') - expect(@ansel.convert("\xA4\x00")).to eq 'Þ'.force_encoding('utf-8') - expect(@ansel.convert("\xA5\x00")).to eq 'Æ'.force_encoding('utf-8') - expect(@ansel.convert("\xA6\x00")).to eq 'Œ'.force_encoding('utf-8') - expect(@ansel.convert("\xA7\x00")).to eq 'ʹ'.force_encoding('utf-8') - expect(@ansel.convert("\xA8\x00")).to eq '·'.force_encoding('utf-8') - expect(@ansel.convert("\xA9\x00")).to eq '♭'.force_encoding('utf-8') - expect(@ansel.convert("\xAA\x00")).to eq '®'.force_encoding('utf-8') - expect(@ansel.convert("\xAB\x00")).to eq '±'.force_encoding('utf-8') - expect(@ansel.convert("\xAB\x00")).to eq '±'.force_encoding('utf-8') - expect(@ansel.convert("\xAC\x00")).to eq 'Ơ'.force_encoding('utf-8') - expect(@ansel.convert("\xAD\x00")).to eq 'Ư'.force_encoding('utf-8') - expect(@ansel.convert("\xAE\x00")).to eq 'ʼ'.force_encoding('utf-8') - expect(@ansel.convert("\xB0\x00")).to eq 'ʻ'.force_encoding('utf-8') - expect(@ansel.convert("\xB1\x00")).to eq 'ł'.force_encoding('utf-8') - expect(@ansel.convert("\xB2\x00")).to eq 'ø'.force_encoding('utf-8') - expect(@ansel.convert("\xB3\x00")).to eq 'đ'.force_encoding('utf-8') - expect(@ansel.convert("\xB4\x00")).to eq 'þ'.force_encoding('utf-8') - expect(@ansel.convert("\xB5\x00")).to eq 'æ'.force_encoding('utf-8') - expect(@ansel.convert("\xB6\x00")).to eq 'œ'.force_encoding('utf-8') - expect(@ansel.convert("\xB7\x00")).to eq 'ʺ'.force_encoding('utf-8') - expect(@ansel.convert("\xB8\x00")).to eq 'ı'.force_encoding('utf-8') - expect(@ansel.convert("\xB9\x00")).to eq '£'.force_encoding('utf-8') - expect(@ansel.convert("\xBA\x00")).to eq 'ð'.force_encoding('utf-8') - expect(@ansel.convert("\xBC\x00")).to eq 'ơ'.force_encoding('utf-8') - expect(@ansel.convert("\xBD\x00")).to eq 'ư'.force_encoding('utf-8') - expect(@ansel.convert("\xC0\x00")).to eq '°'.force_encoding('utf-8') - expect(@ansel.convert("\xC1\x00")).to eq 'ℓ'.force_encoding('utf-8') - expect(@ansel.convert("\xC2\x00")).to eq '℗'.force_encoding('utf-8') - expect(@ansel.convert("\xC3\x00")).to eq '©'.force_encoding('utf-8') - expect(@ansel.convert("\xC4\x00")).to eq '♯'.force_encoding('utf-8') - expect(@ansel.convert("\xC5\x00")).to eq '¿'.force_encoding('utf-8') - expect(@ansel.convert("\xC6\x00")).to eq '¡'.force_encoding('utf-8') - expect(@ansel.convert("\xC7\x00")).to eq 'ß'.force_encoding('utf-8') - expect(@ansel.convert("\xC8\x00")).to eq '€'.force_encoding('utf-8') - - # ANSEL combining characters - expect(@ansel.convert("\xE0\x41")).to eq 'Ả'.force_encoding('utf-8') - expect(@ansel.convert("\xF6\x4C")).to eq 'Ḻ'.force_encoding('utf-8') - expect(@ansel.convert("\xF6")).to eq '̲'.force_encoding('utf-8') - expect(@ansel.convert("\xF9")).to eq '̮'.force_encoding('utf-8') - expect(@ansel.convert("\xF9\x48")).to eq 'Ḫ'.force_encoding('utf-8') - expect(@ansel.convert("\xF2\xE3\x41")).to eq 'Ậ'.force_encoding('utf-8') - expect(@ansel.convert("\xF2\x79")).to eq 'ỵ'.force_encoding('utf-8') - expect(@ansel.convert("\xF2")).to eq '̣'.force_encoding('utf-8') - - # Specific issues - expect(@ansel.convert("\x4D\x65\x6C\xB2\x79")).to eq 'Meløy'.force_encoding('utf-8') - end - - it 'converts full text correctly' do - expect(@ansel.convert('What is the question?')).to eq 'What is the question?' - expect(@ansel.convert("\xC5\x00What is the question?")).to eq '¿What is the question?'.force_encoding('utf-8') - expect(@ansel.convert("\xC3\x00 1994")).to eq '© 1994'.force_encoding('utf-8') - expect(@ansel.convert("\xB9\x004.59")).to eq '£4.59'.force_encoding('utf-8') - end - end -end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b48470e..181bee2 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,6 +1,5 @@ # encoding: ascii-8bit -$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib/') require 'rubygems' require 'rspec' require 'ansel'