Skip to content

Commit

Permalink
Merge pull request #77 from robotdana/dev
Browse files Browse the repository at this point in the history
Use Encoding.default_external to handle encoding
  • Loading branch information
robotdana authored Jun 10, 2021
2 parents f068735 + 02c2dac commit 775db18
Show file tree
Hide file tree
Showing 21 changed files with 67 additions and 38 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# v0.9.1
- Assume all files are utf8, more comprehensively. (Sets ::Encoding.default_external and default_internal while running)

# v0.9.0
- Recognize url with _ in query string and zero length path
- Assume all files are utf8
Expand Down
3 changes: 3 additions & 0 deletions bin/clean
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require_relative 'generate/util/write'

unless ARGV[0]
Expand Down
3 changes: 3 additions & 0 deletions bin/console
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require 'bundler/setup'
require 'spellr'
require 'pry'
Expand Down
3 changes: 3 additions & 0 deletions bin/generate/css
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require_relative 'util/mdn_words'

ADDITIONAL_WORDS = [
Expand Down
3 changes: 3 additions & 0 deletions bin/generate/english
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require 'net/http'
require 'fileutils'
require_relative 'util/write'
Expand Down
3 changes: 3 additions & 0 deletions bin/generate/html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require_relative 'util/mdn_words'
require_relative 'util/fetch'

Expand Down
3 changes: 3 additions & 0 deletions bin/generate/ruby
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require_relative 'util/write'
require 'set'

Expand Down
7 changes: 3 additions & 4 deletions bin/generate/util/write.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

require 'pathname'
require_relative '../../../lib/spellr/wordlist'
require_relative '../../../lib/spellr/stringio_with_encoding'

module Write
OUTPUT_DIR = Pathname.new(
Expand All @@ -16,11 +15,11 @@ def wordlist_path(name)
def write_wordlist(words, name)
wordlist_path(name).parent.mkpath
Spellr::Wordlist.new(wordlist_path(name))
.clean(::Spellr::StringIOWithEncoding.new(words.force_encoding(::Encoding::UTF_8)))
.clean(::StringIO.new(words.force_encoding(::Encoding::UTF_8)))
end

def append_wordlist(words, name)
old_words = wordlist_path(name).read(encoding: ::Encoding::UTF_8) if wordlist_path(name).exist?
old_words = wordlist_path(name).read if wordlist_path(name).exist?
write_wordlist("#{words}\n#{old_words}".dup, name)
end

Expand All @@ -30,6 +29,6 @@ def license_path(name, ext = '.txt')

def write_license(license, name, ext = '.txt')
license_path(name).parent.mkpath
license_path(name, ext).write(license, encoding: ::Encoding::UTF_8)
license_path(name, ext).write(license)
end
end
3 changes: 3 additions & 0 deletions bin/possible_key_data/train
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require 'pathname'
require_relative '../../lib/spellr/key_tuner/possible_key'
require_relative '../../lib/spellr/key_tuner/stats'
Expand Down
3 changes: 3 additions & 0 deletions bin/time
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# use mac terminal not vs code terminal
# ensure nothing else is watching that dir in the filesystem e.g. webpack

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

require 'open3'
require 'shellwords'
RUNS = 10
Expand Down
3 changes: 3 additions & 0 deletions exe/spellr
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@

require_relative '../lib/spellr/cli'

::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8

exit Spellr::CLI.new(ARGV).run
2 changes: 1 addition & 1 deletion lib/spellr/config_loader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def load_config
def load_yaml(path)
return {} unless ::File.exist?(path)

YAML.safe_load(::File.read(path, encoding: ::Encoding::UTF_8), symbolize_names: true)
YAML.safe_load(::File.read(path), symbolize_names: true)
end

def merge_config(default, project) # rubocop:disable Metrics/MethodLength
Expand Down
2 changes: 1 addition & 1 deletion lib/spellr/file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def first_line # rubocop:disable Metrics/MethodLength
end

def read_write
write(yield(read(encoding: ::Encoding::UTF_8)), encoding: ::Encoding::UTF_8)
write(yield read)
end
end
end
22 changes: 17 additions & 5 deletions lib/spellr/rake_task.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# frozen_string_literal: true

require 'rake'
require 'spellr/cli'
require 'shellwords'

module Spellr
Expand Down Expand Up @@ -36,11 +35,13 @@ def describe_task
desc("Run spellr (default args: #{escaped_argv})")
end

def define_task
def define_task # rubocop:disable Metrics/MethodLength
task(@name, :'*args') do |_, task_argv|
argv = argv_or_default(task_argv)
write_cli_cmd(argv)
run(argv)
with_utf_8 do
argv = argv_or_default(task_argv)
write_cli_cmd(argv)
run(argv)
end
end
end

Expand All @@ -49,6 +50,7 @@ def write_cli_cmd(argv)
end

def run(argv)
require 'spellr/cli'
status = Spellr::CLI.new(argv).run
exit 1 unless status == 0
end
Expand All @@ -57,5 +59,15 @@ def argv_or_default(task_argv)
task_argv = task_argv.to_a.compact
task_argv.empty? ? @default_argv : task_argv
end

def with_utf_8 # rubocop:disable Metrics/MethodLength
old_default_external = ::Encoding.default_external
old_default_internal = ::Encoding.default_internal
::Encoding.default_external = ::Encoding::UTF_8
::Encoding.default_internal = ::Encoding::UTF_8
yield
::Encoding.default_external = old_default_external
::Encoding.default_internal = old_default_internal
end
end
end
11 changes: 0 additions & 11 deletions lib/spellr/stringio_with_encoding.rb

This file was deleted.

14 changes: 6 additions & 8 deletions lib/spellr/tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def map(&block)
end

def each_term(&block)
file.each_line(encoding: ::Encoding::UTF_8) do |line|
file.each_line do |line|
prepare_tokenizer_for_line(line)&.each_term(&block)
end
ensure
Expand Down Expand Up @@ -56,14 +56,12 @@ def each_line_with_stats # rubocop:disable Metrics/MethodLength
char_offset = @start_at.line_location.char_offset
byte_offset = @start_at.line_location.byte_offset

file
.each_line(encoding: ::Encoding::UTF_8)
.with_index(@start_at.line_location.line_number) do |line, line_number|
yield line, line_number, char_offset, byte_offset
file.each_line.with_index(@start_at.line_location.line_number) do |line, line_number|
yield line, line_number, char_offset, byte_offset

char_offset += line.length
byte_offset += line.bytesize
end
char_offset += line.length
byte_offset += line.bytesize
end
ensure
file.close
end
Expand Down
2 changes: 1 addition & 1 deletion lib/spellr/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Spellr
VERSION = '0.9.0'
VERSION = '0.9.1'
end
6 changes: 3 additions & 3 deletions lib/spellr/wordlist.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def <<(term)
touch
@include[term] = true
insert_sorted(term)
@path.write(words.join, encoding: ::Encoding::UTF_8) # we don't need to clear the cache
@path.write(words.join) # we don't need to clear the cache
end

def words
Expand All @@ -55,7 +55,7 @@ def clean(file = @path)
end

def write(content)
@path.write(content, encoding: ::Encoding::UTF_8)
@path.write(content)

clear_cache
end
Expand All @@ -70,7 +70,7 @@ def touch
return if exist?

@path.dirname.mkpath
@path.write('', encoding: ::Encoding::UTF_8)
@path.write('')
clear_cache
end

Expand Down
2 changes: 2 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

::Encoding.default_external = ::Encoding::UTF_8

require 'fileutils'
require 'pathname'
FileUtils.rm_rf(File.join(__dir__, '..', 'coverage'))
Expand Down
2 changes: 1 addition & 1 deletion spec/support/stub_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def stub_fs_file_list(filenames)
def stub_fs_file(filename, body = '')
path = Spellr.pwd.join(filename)
path.parent.mkpath
path.write(body, encoding: ::Encoding::UTF_8)
path.write(body)
path
end
end
Expand Down
5 changes: 2 additions & 3 deletions spec/tokenizer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@

require 'spec_helper'
require_relative '../lib/spellr/tokenizer'
require_relative '../lib/spellr/stringio_with_encoding'

RSpec::Matchers.define :have_tokens do |*expected|
match do |actual|
@actual = ::Spellr::Tokenizer.new(::Spellr::StringIOWithEncoding.new(actual)).terms
@actual = ::Spellr::Tokenizer.new(::StringIO.new(actual)).terms
expect(@actual).to match(expected)
end

Expand All @@ -16,7 +15,7 @@

RSpec::Matchers.define :have_token_positions do |*expected|
match do |actual|
@actual = Spellr::Tokenizer.new(::Spellr::StringIOWithEncoding.new(actual)).map(&:coordinates)
@actual = Spellr::Tokenizer.new(::StringIO.new(actual)).map(&:coordinates)
expect(@actual).to match(expected)
end

Expand Down

0 comments on commit 775db18

Please sign in to comment.