From 0fe1b334ae299da45975be5dc8062fbe14513af0 Mon Sep 17 00:00:00 2001 From: Dana Sherson Date: Sat, 12 Jun 2021 17:51:42 +1000 Subject: [PATCH] Add --prune-wordlists option --- .rubocop.yml | 3 + .spellr_wordlists/english.txt | 75 ++------------ .spellr_wordlists/lorem.txt | 3 - .spellr_wordlists/ruby.txt | 6 -- CHANGELOG.md | 5 +- README.md | 1 + lib/spellr/cli.rb | 21 ++-- lib/spellr/cli_options.rb | 17 ++- lib/spellr/config.rb | 12 ++- lib/spellr/config_validator.rb | 22 +++- lib/spellr/prune.rb | 80 ++++++++++++++ lib/spellr/version.rb | 2 +- lib/spellr/wordlist.rb | 25 +++-- lib/spellr/wordlist_reporter.rb | 2 - spec/feature_spec.rb | 178 +++++++++++++++++++++++++++++--- spec/prune_spec.rb | 92 +++++++++++++++++ spec/spellr_spec.rb | 20 ++++ spec/wordlist_spec.rb | 8 +- 18 files changed, 454 insertions(+), 118 deletions(-) create mode 100644 lib/spellr/prune.rb create mode 100644 spec/prune_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index de55219..df668e6 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -158,6 +158,9 @@ RSpec/MultipleExpectations: RSpec/NamedSubject: Enabled: false +RSpec/NestedGroups: + Enabled: false + # This matches the style we've been using all along (ever so slightly more frequently) Style/Alias: diff --git a/.spellr_wordlists/english.txt b/.spellr_wordlists/english.txt index 5ede995..d9a5119 100644 --- a/.spellr_wordlists/english.txt +++ b/.spellr_wordlists/english.txt @@ -1,59 +1,41 @@ -abc -addable alnum -arg arglist -args backports baz behaviour beihang -bundler capfile changelog -charpos cli -cmd codebase -codebases colours config configs -css ctrl customisations cyclomatic -def desaturate -dict diffable -diffs -docker dockerfile dockerhub dockerignore -downloader editmsg elp +entrya +entryb +entryc +entryd +entrye +entryf +entryg env -eos eot eplace erb ercim esque -exclusions -exe -executables -exitstatus -ext -filename -filenames -foo gemfile gemspec -getch -git github gitignore gitignored @@ -64,24 +46,18 @@ gtm haml hardcoded hashbang -hashbangs -hashbangs -heuristically hml hostnames -href htt hyperwallet i'll i'm ico -inclusions ise ize javascript jbuilder jpg -jruby json jsx keio @@ -89,79 +65,47 @@ keydata keypress khz klass -kwargs localhost logstash -lstripped -mailto -marketplacer -matcher -matchers maths mdn memoized -merchantability multibyte -multiline -newlines noninfringement -nonwords notaword num -optparse -org -param params png prg -punycode pwd rakefile rdoc readme -redisplay regexps -repo rspec rubocop rubygems rvm scss sendgrid -shelljoin -shellsplit -shellwords sherson simplecov -stderr -stdin stdlib -stdout -str -stringscanner -strscan -struct -sublicense -subwords +subcommand sudo superselector svg symlinks -thu tlds tmp todo -tokenize tokenizer tokenizes tsx ttf txt unitless -unrecognised -unscan uploader -uri urls usr utf @@ -170,12 +114,9 @@ wai webpack woff wordlist -wordlists wordn't wtf -xdescribe xit xlsx -yardoc yml zsh diff --git a/.spellr_wordlists/lorem.txt b/.spellr_wordlists/lorem.txt index fbb4c5e..91b8cfc 100644 --- a/.spellr_wordlists/lorem.txt +++ b/.spellr_wordlists/lorem.txt @@ -1,8 +1,5 @@ amet dolar dolares -dolor -elp -eplace ipsum lorem diff --git a/.spellr_wordlists/ruby.txt b/.spellr_wordlists/ruby.txt index 1aaa50b..27391a4 100644 --- a/.spellr_wordlists/ruby.txt +++ b/.spellr_wordlists/ruby.txt @@ -1,11 +1,5 @@ cov -kwarg -matchdata nocov nokogiri -pty rubo -simplecov -subclasses -vars webmock diff --git a/CHANGELOG.md b/CHANGELOG.md index 9036ace..21ec9db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ +# v0.10.0 +- add `spellr --prune-wordlists` to remove unused terms. + # v0.9.1 -- Assume all files are utf8, more comprehensively. (Sets ::Encoding.default_external and default_internal while running) +- Assume all files are utf8, more comprehensively. (Sets `::Encoding.default_external` and `.default_internal` while running) # v0.9.0 - Recognize url with _ in query string and zero length path diff --git a/README.md b/README.md index e764a45..f60f5ea 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ $ spellr --wordlist '*.rb' '*_test.js' There are some support commands available: ```bash +$ spellr --prune-wordlists # after succeeding, remove unused words from the .spellr_wordlists/ files $ spellr --dry-run # list files that will be checked $ spellr --version # for the current version $ spellr --help # for the list of flags available diff --git a/lib/spellr/cli.rb b/lib/spellr/cli.rb index fb1d446..173bd72 100644 --- a/lib/spellr/cli.rb +++ b/lib/spellr/cli.rb @@ -13,26 +13,33 @@ def initialize(argv) end def run - catch(:spellr_exit) { check } + catch(:spellr_exit) { run_subcommand } rescue Spellr::Error => e - Spellr.config.output.warn(Spellr::StringFormat.red(e.message)) && 1 + Spellr.config.output.warn(Spellr::StringFormat.red(e.message)) 1 end private - def check + def run_subcommand CLI::Options.parse(@argv) Spellr.config.valid? - checker = Spellr.config.checker.new(files: files) + exit_code = check + exit_code = prune if exit_code.zero? && Spellr.config.prune_wordlists? + exit_code + end + + def check + checker = Spellr.config.checker.new(files: Spellr.config.file_list) checker.check checker.exit_code end - def files - require_relative 'file_list' - Spellr::FileList.new(@argv) + def prune + require_relative 'prune' + Spellr.config.output.puts '' + Spellr::Prune.run end end end diff --git a/lib/spellr/cli_options.rb b/lib/spellr/cli_options.rb index 6bdf48c..327a3de 100644 --- a/lib/spellr/cli_options.rb +++ b/lib/spellr/cli_options.rb @@ -12,6 +12,8 @@ def parse(argv) @parallel_option = false options.parse!(argv) + + Spellr.config.file_list_patterns = argv end private @@ -20,7 +22,7 @@ def parse(argv) def options # rubocop:disable Metrics/MethodLength, Metrics/AbcSize opts = OptionParser.new - opts.banner = 'Usage: spellr [options] [files]' + opts.banner = 'Usage: spellr [options] [file patterns]' opts.separator('') opts.on('-w', '--wordlist', 'Outputs errors in wordlist format', &method(:wordlist_option)) opts.on('-q', '--quiet', 'Silences output', &method(:quiet_option)) @@ -28,9 +30,12 @@ def options # rubocop:disable Metrics/MethodLength, Metrics/AbcSize opts.separator('') opts.on('--[no-]parallel', 'Run in parallel or not, default --parallel', &method(:parallel_option)) opts.on('-d', '--dry-run', 'List files to be checked', &method(:dry_run_option)) - opts.on('-f', '--suppress-file-rules', <<~HELP, &method(:suppress_file_rules)) + opts.on('-f', '--suppress-file-rules', <<~HELP, &method(:suppress_file_rules_option)) Suppress all configured, default, and gitignore include and exclude patterns HELP + opts.on('--prune-wordlists', <<~HELP, &method(:prune_wordlists_option)) + Prune unused words from .spellr_wordlists/*.txt after checking. + HELP opts.separator('') opts.on('-c', '--config FILENAME', String, <<~HELP, &method(:config_option)) Path to the config file (default ./.spellr.yml) @@ -49,6 +54,8 @@ def wordlist_option(_) def quiet_option(_) require_relative 'quiet_reporter' + require_relative 'output_stubbed' + Spellr.config.output = Spellr::OutputStubbed.new Spellr.config.reporter = Spellr::QuietReporter.new end @@ -59,10 +66,14 @@ def interactive_option(_) Spellr.config.checker = Spellr::CheckInteractive unless @parallel_option end - def suppress_file_rules(_) + def suppress_file_rules_option(_) Spellr.config.suppress_file_rules = true end + def prune_wordlists_option(_) + Spellr.config.prune_wordlists = true + end + def config_option(file) file = Spellr.pwd.join(file).expand_path diff --git a/lib/spellr/config.rb b/lib/spellr/config.rb index cbe4978..a46322b 100644 --- a/lib/spellr/config.rb +++ b/lib/spellr/config.rb @@ -10,12 +10,13 @@ module Spellr class Config - attr_writer :reporter, :checker + attr_writer :reporter, :checker, :output - attr_accessor :suppress_file_rules, :dry_run + attr_accessor :suppress_file_rules, :dry_run, :prune_wordlists, :file_list_patterns attr_reader :config_file alias_method :dry_run?, :dry_run + alias_method :prune_wordlists?, :prune_wordlists def initialize @config = ConfigLoader.new @@ -88,6 +89,13 @@ def reset! # rubocop:disable Metrics/MethodLength remove_instance_variable(:@key_minimum_length) if defined?(@key_minimum_length) end + def file_list + @file_list ||= begin + require_relative 'file_list' + Spellr::FileList.new(file_list_patterns) + end + end + private def dry_run_checker diff --git a/lib/spellr/config_validator.rb b/lib/spellr/config_validator.rb index 815cf9e..f591bb8 100644 --- a/lib/spellr/config_validator.rb +++ b/lib/spellr/config_validator.rb @@ -13,6 +13,8 @@ class ConfigValidator validate :only_has_one_key_per_language validate :languages_with_conflicting_keys validate :keys_are_single_characters + validate :prune_wordlists_with_no_argv_patterns + validate :prune_wordlists_with_no_dry_run def valid? raise ::Spellr::Config::Invalid, errors.join("\n") unless super @@ -32,10 +34,10 @@ def interactive_is_interactive # rubocop:disable Metrics/MethodLength end def checker_and_reporter_coexist - if Spellr.config.reporter.class.name == 'Spellr::Interactive' && - Spellr.config.checker.name == 'Spellr::CheckParallel' - errors << 'CLI error: --interactive is incompatible with --parallel' - end + return unless Spellr.config.reporter.class.name == 'Spellr::Interactive' && + Spellr.config.checker.name == 'Spellr::CheckParallel' + + errors << 'CLI error: --interactive is incompatible with --parallel' end def only_has_one_key_per_language @@ -45,6 +47,18 @@ def only_has_one_key_per_language end end + def prune_wordlists_with_no_argv_patterns + return unless Spellr.config.prune_wordlists? && !Spellr.config.file_list_patterns.empty? + + errors << 'CLI error: --prune-wordlists is incompatible with file patterns' + end + + def prune_wordlists_with_no_dry_run + return unless Spellr.config.prune_wordlists? && Spellr.config.dry_run? + + errors << 'CLI error: --prune-wordlists is incompatible with --dry-run' + end + def languages_with_conflicting_keys Spellr.config.languages.select(&:addable?).group_by(&:key).values.select do |g| g.length > 1 diff --git a/lib/spellr/prune.rb b/lib/spellr/prune.rb new file mode 100644 index 0000000..79f4f15 --- /dev/null +++ b/lib/spellr/prune.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require_relative 'wordlist_reporter' +require_relative 'output_stubbed' +require_relative 'file_list' +require_relative 'check_parallel' +require_relative 'string_format' +require_relative 'reporter' + +module Spellr + class Prune + class << self + def run + prunable_wordlists.each do |wordlist| + prune_wordlist(wordlist) + end + + 0 + end + + private + + def prune_wordlist(wordlist) + report_start(wordlist) + prepare_wordlist(wordlist) + + reporter = check_with_independent_reporter + report_pruned_count(wordlist, reporter) + update_wordlist(wordlist, reporter) + end + + def prepare_wordlist(wordlist) + wordlist.force_nonexistence + end + + def check_with_independent_reporter + reporter = ::Spellr::WordlistReporter.new(Spellr::OutputStubbed.new) + ::Spellr.config.checker.new(files: files, reporter: reporter).check + reporter + end + + def update_wordlist(wordlist, reporter) + if reporter.words.empty? + wordlist.delete + elsif reporter.words.length < wordlist.length + wordlist.write(reporter.words.sort.join) + end + end + + def report_start(wordlist) + print "pruning: #{wordlist.path.basename}" + end + + def report_pruned_count(wordlist, reporter) + wordlist.clear_cache + count = wordlist.length - reporter.words.length + + puts "\rpruned: #{wordlist.path.basename} #{StringFormat.pluralize('word', count)} removed" + end + + def puts(string) + ::Spellr.config.output.puts(string) + end + + def print(string) + ::Spellr.config.output.print(string) + end + + def prunable_wordlists + Spellr.config.languages.select { |l| l.project_wordlist.exist? }.sort_by do |language| + files.count { |file| language.matches?(file) } + end.reverse.map(&:project_wordlist) + end + + def files + @files ||= Spellr::FileList.new + end + end + end +end diff --git a/lib/spellr/version.rb b/lib/spellr/version.rb index 8d9eed9..0e09d8b 100644 --- a/lib/spellr/version.rb +++ b/lib/spellr/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Spellr - VERSION = '0.9.1' + VERSION = '0.10.0' end diff --git a/lib/spellr/wordlist.rb b/lib/spellr/wordlist.rb index 97f8102..47e8e44 100644 --- a/lib/spellr/wordlist.rb +++ b/lib/spellr/wordlist.rb @@ -54,6 +54,12 @@ def clean(file = @path) write(Spellr::Tokenizer.new(file, skip_key: false).normalized_terms.join) end + def force_nonexistence + clear_cache + + @exist = false + end + def write(content) @path.write(content) @@ -66,6 +72,13 @@ def exist? @exist = @path.exist? end + def delete + return unless exist? + + @path.delete + clear_cache + end + def touch return if exist? @@ -78,17 +91,17 @@ def length to_a.length end + def clear_cache + @words = nil + @include = {} + remove_instance_variable(:@exist) if defined?(@exist) + end + private def insert_sorted(term) insert_at = words.bsearch_index { |value| value >= term } insert_at ? words.insert(insert_at, term) : words.push(term) end - - def clear_cache - @words = nil - @include = {} - remove_instance_variable(:@exist) if defined?(@exist) - end end end diff --git a/lib/spellr/wordlist_reporter.rb b/lib/spellr/wordlist_reporter.rb index c3cec2d..b13457c 100644 --- a/lib/spellr/wordlist_reporter.rb +++ b/lib/spellr/wordlist_reporter.rb @@ -13,8 +13,6 @@ def call(token) words << token.spellr_normalize end - private - def words @words ||= begin output.counts[:words] = Set.new unless output.counts.key?(:words) diff --git a/spec/feature_spec.rb b/spec/feature_spec.rb index 6b1cb20..69761da 100644 --- a/spec/feature_spec.rb +++ b/spec/feature_spec.rb @@ -7,7 +7,7 @@ spellr('--help') expect(stdout).to have_output <<~HELP - Usage: spellr [options] [files] + Usage: spellr [options] [file patterns] -w, --wordlist Outputs errors in wordlist format -q, --quiet Silences output @@ -16,6 +16,7 @@ --[no-]parallel Run in parallel or not, default --parallel -d, --dry-run List files to be checked -f, --suppress-file-rules Suppress all configured, default, and gitignore include and exclude patterns + --prune-wordlists Prune unused words from .spellr_wordlists/*.txt after checking. -c, --config FILENAME Path to the config file (default ./.spellr.yml) -v, --version Returns the current version @@ -26,6 +27,153 @@ end end + describe '--prune-wordlists' do + before do + with_temp_dir + english_wordlist + ruby_wordlist + end + + let(:english_wordlist) do + stub_fs_file('.spellr_wordlists/english.txt', <<~FILE) + entrya + entryb + entryc + entryd + entrye + FILE + end + + let(:ruby_wordlist) do + stub_fs_file('.spellr_wordlists/ruby.txt', <<~FILE) + entryc + entryd + FILE + end + + context 'with some unnecessary words' do + before do + stub_fs_file('checkable_file.rb', 'entrya entryc') + stub_fs_file('checkable_file.txt', 'entryb') + end + + it 'removes unnecessary words, most general file first.' do + spellr '--prune-wordlists' + + expect(stderr).to be_empty + expect(exitstatus).to be 0 + expect(stdout).to have_output <<~STDOUT + + 2 files checked + 0 errors found + + pruned: english.txt 3 words removed + pruned: ruby.txt 1 word removed + STDOUT + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + FILE + + expect(ruby_wordlist.read).to eq <<~FILE + entryc + FILE + end + + describe '--quiet' do + it 'removes unnecessary words, most general file first, quietly' do + spellr '--prune-wordlists --quiet' do + expect(exitstatus).to eq 0 + expect(stderr).to be_empty + expect(stdout).to be_empty + end + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + FILE + + expect(ruby_wordlist.read).to eq <<~FILE + entryc + FILE + end + end + + describe '--dry-run' do + it 'complains when --prune-wordlists then --dry-run' do + spellr('--prune-wordlists --dry-run') + + expect(exitstatus).to eq 1 + expect(stdout).to be_empty + expect(stderr).to have_output <<~STDERR + #{red('CLI error: --prune-wordlists is incompatible with --dry-run')} + STDERR + end + + it 'complains when --dry-run then --prune-wordlists' do + spellr('--dry-run --prune-wordlists') + + expect(exitstatus).to eq 1 + expect(stdout).to be_empty + expect(stderr).to have_output <<~STDERR + #{red('CLI error: --prune-wordlists is incompatible with --dry-run')} + STDERR + end + end + + describe 'ARGV' do + it 'complains when --prune-wordlists with file patterns' do + spellr("--prune-wordlists 'checkable_file.*'") + + expect(exitstatus).to eq 1 + expect(stdout).to be_empty + expect(stderr).to have_output <<~STDERR + #{red('CLI error: --prune-wordlists is incompatible with file patterns')} + STDERR + end + end + end + + context 'with some unrecognized words' do + before do + stub_fs_file('checkable_file.rb', 'entrya entryc') + stub_fs_file('checkable_file.txt', 'entryb entryf entryg') + end + + it 'runs spellr first as normal reporting errors' do + spellr '--prune-wordlists' + + expect(stderr).to be_empty + expect(exitstatus).to be 1 + + expect(stdout).to eq <<~STDOUT + #{aqua 'checkable_file.txt:1:7'} entryb #{red 'entryf'} entryg + #{aqua 'checkable_file.txt:1:14'} entryb entryf #{red 'entryg'} + + 2 files checked + 2 errors found + + to add or replace words interactively, run: + spellr --interactive checkable_file.txt + STDOUT + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + entryc + entryd + entrye + FILE + + expect(ruby_wordlist.read).to eq <<~FILE + entryc + entryd + FILE + end + end + end + describe 'bin/generate' do before do with_temp_dir @@ -312,23 +460,23 @@ describe 'combining --parallel and --interactive' do it 'complains when --interactive then --parallel' do - spellr('--interactive --parallel') do - expect(exitstatus).to eq 1 - expect(stdout).to be_empty - expect(stderr).to have_output <<~STDERR - #{red('CLI error: --interactive is incompatible with --parallel')} - STDERR - end + spellr('--interactive --parallel') + + expect(exitstatus).to eq 1 + expect(stdout).to be_empty + expect(stderr).to have_output <<~STDERR + #{red('CLI error: --interactive is incompatible with --parallel')} + STDERR end it 'complains when --parallel then --interactive' do - spellr('--parallel --interactive') do - expect(exitstatus).to eq 1 - expect(stdout).to be_empty - expect(stderr).to have_output <<~STDERR - #{red('CLI error: --interactive is incompatible with --parallel')} - STDERR - end + spellr('--parallel --interactive') + + expect(exitstatus).to eq 1 + expect(stdout).to be_empty + expect(stderr).to have_output <<~STDERR + #{red('CLI error: --interactive is incompatible with --parallel')} + STDERR end end diff --git a/spec/prune_spec.rb b/spec/prune_spec.rb new file mode 100644 index 0000000..0f4e920 --- /dev/null +++ b/spec/prune_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative '../lib/spellr/prune' + +RSpec.describe Spellr::Prune do + describe '.run' do + before do + stub_config(output: Spellr::OutputStubbed.new) + + with_temp_dir + english_wordlist + ruby_wordlist + end + + let(:english_wordlist) do + stub_fs_file('.spellr_wordlists/english.txt', <<~FILE) + entrya + entryb + entryc + entryd + entrye + FILE + end + + let(:ruby_wordlist) do + stub_fs_file('.spellr_wordlists/ruby.txt', <<~FILE) + entryc + entryd + FILE + end + + context 'with some unnecessary words' do + before do + stub_fs_file('checkable_file.rb', 'entrya entryc') + stub_fs_file('checkable_file.txt', 'entryb') + end + + it 'removes unnecessary words, most general file first.' do + described_class.run + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + FILE + + expect(ruby_wordlist.read).to eq <<~FILE + entryc + FILE + end + end + + context 'with only unnecessary words' do + before do + stub_fs_file('checkable_file.rb', 'entrya') + stub_fs_file('checkable_file.txt', 'entryb') + end + + it 'removes unnecessary words, most general file first, deletes empty files' do + described_class.run + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + FILE + + expect(ruby_wordlist).not_to exist + end + end + + context 'with only necessary words' do + before do + stub_fs_file('checkable_file.rb', 'entrya entryc entryd') + stub_fs_file('checkable_file.txt', 'entryb') + end + + it 'removes unnecessary words, most general file first.' do + described_class.run + + expect(english_wordlist.read).to eq <<~FILE + entrya + entryb + FILE + + expect(ruby_wordlist.read).to eq <<~FILE + entryc + entryd + FILE + end + end + end +end diff --git a/spec/spellr_spec.rb b/spec/spellr_spec.rb index e1ec674..ebe51b3 100644 --- a/spec/spellr_spec.rb +++ b/spec/spellr_spec.rb @@ -6,4 +6,24 @@ it 'has a version number' do expect(described_class::VERSION).not_to be nil end + + describe 'pwd' do + it 'is $PWD' do + expect(described_class.pwd.to_s).to eq ENV['PWD'] + end + + it 'is Pathname' do + expect(described_class.pwd).to be_a(Pathname) + end + end + + describe 'pwd_s' do + it 'is $PWD' do + expect(described_class.pwd_s).to eq ENV['PWD'] + end + + it 'is a String' do + expect(described_class.pwd_s.class).to be(String) + end + end end diff --git a/spec/wordlist_spec.rb b/spec/wordlist_spec.rb index e67a89e..a1aebcb 100644 --- a/spec/wordlist_spec.rb +++ b/spec/wordlist_spec.rb @@ -10,7 +10,13 @@ context 'when missing' do describe '#include?' do it "doesn't raise Errno::ENOENT" do - expect(subject).not_to include 'bar' + expect { subject.include?('bar') }.not_to raise_error + end + end + + describe '#delete' do + it "doesn't raise Errno::ENOENT" do + expect { subject.delete }.not_to raise_error end end end