Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add spellchecking feature #233

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pydocstyle = "*"
radon = "*"
xenon = "*"
snoop = "*"
symspellpy = "*"

[pipenv]
allow_prereleases = true
Expand Down
691 changes: 409 additions & 282 deletions Pipfile.lock

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions gator/checks/check_Spellcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Check that writing in a report file is correctly spelled."""

import argparse

from gator import checkers
from gator import constants
from gator import fragments
from gator import invoke


def get_parser():
"""Get a parser for the arguments provided on the command-line."""
parser = argparse.ArgumentParser(
prog="Spellcheck",
description="Check Provided by GatorGrader: Spell Checking",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

# Required Named Checker Argument(s).
required_group = parser.add_argument_group("Required checker arguments")

# (Required) FILE: source file.
required_group.add_argument(
"--file", type=str, help="File for checking", required=True
)

# (Required) DIRECTORY: path to file.
required_group.add_argument("--directory", type=str, metavar="DIR", required=True)

# Optional Arguments.
optional_group = parser.add_argument_group("Optional check arguments")

# (Not Required) Ignore Mistakes: ignore a specific amount of spelling mistakes.
optional_group.add_argument(
"--ignore",
help="Ignore a certain amount of spelling mistakes",
default=0,
type=int,
required=False,
)
return parser


def parse(args, parser=None):
"""Use the parser on the provided arguments."""
return checkers.parse(get_parser, args, parser)


# pylint: disable=unused-argument
def act(main_parsed_arguments, check_remaining_arguments):
"""Perform the action for this check."""
# Two required arguments for this check: File and Directory Path
# One argument is not required: Ignore Count

check_parsed_arguments = parse(check_remaining_arguments)

file = check_parsed_arguments.file
directory = check_parsed_arguments.directory
ignore_count = check_parsed_arguments.ignore
return [invoke.invoke_spellcheck(file, directory, ignore_count,)]
69 changes: 69 additions & 0 deletions gator/invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from gator import markdown
from gator import report
from gator import repository
from gator import spelling
from gator import run
from gator import util

Expand Down Expand Up @@ -871,3 +872,71 @@ def invoke_all_command_count_checks(command, expected_count, exact=False):
command_output,
exact,
)


def invoke_spellcheck(file, file_directory, ignore):
"""Check to see if technical writing is spelled correctly."""
# Define important variables
gatorgrader_home = util.get_project_home()
directory_path = files.create_path(home=file_directory)
did_check_pass = True
spell_check_outcome = 0

# Perform the spell checking on the inputted file.
spell_check_outcome, did_check_pass = spelling.check(file, directory_path, ignore)

# If no misspelled words are detected
if did_check_pass:
message = (
"File "
+ file
+ " had "
+ str(spell_check_outcome)
+ " spelling mistakes, well done!"
)
diagnostic = (
"File "
+ file
+ " contains correct writing at "
+ file_directory
+ "directory"
)
did_check_pass = True
else:
if spell_check_outcome == 1:
message = (
"File "
+ file
+ " had "
+ str(spell_check_outcome)
+ " spelling mistake!"
)
diagnostic = (
"File "
+ file
+ " contains incorrect writing at "
+ file_directory
+ "directory"
)
elif spell_check_outcome > 1:
message = (
"File "
+ file
+ " had "
+ str(spell_check_outcome)
+ " spelling mistakes!"
)
diagnostic = (
"File "
+ file
+ " contains incorrect writing at "
+ file_directory
+ "directory"
)
did_check_pass = False

# Report the results of the spellcheck.
report_result(did_check_pass, message, diagnostic)

# Output the results of the check as a boolean.
return did_check_pass
141 changes: 141 additions & 0 deletions gator/spelling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Perform the spell checking processes to determine that technical writing in a report is correctly spelled."""

from symspellpy import SymSpell, Verbosity
import pkg_resources, re, markdown
from gator import fragments, files


# TODO: Finish the spellchecking function that can filter out contents contained inside of a code block.
# NOTE: Run this command to run and test the spellcheck feature.
# python gatorgrader.py Spellcheck --file input.md --directory /Users/jordanbyrne/Desktop --ignore 0
def check(input_file, file_directory, ignore):
"""Function to run the symspellpy tool on the contents of the input file."""
# Define variables that are used to find if the words in a markdown document are correctly spelled.
file = []
spell_check_suggestions = []
filter_types = []
spell_check_outcome = True
incorrect_spell_check_count = 0

# Input the markdown file + separate words by spaces and save the contents into a list.
for file_for_checking in files.create_paths(file=input_file, home=file_directory):
file = file_for_checking.read_text().splitlines()

# Initialize the files and libraries to perform the spellchecking.
spellcheck = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = pkg_resources.resource_filename(
"symspellpy", "frequency_dictionary_en_82_765.txt"
)

# term_index is the column of the term and count_index is the column of the term frequency
spellcheck.load_dictionary(dictionary_path, term_index=0, count_index=1)

# NOTE: Things to consider in the future implementation.
# 1. How to make sure that you don't spell check inside of code blocks, segments, and links.
# 2. DONE: How to detect garbage words. (Test the tool to see how good it is.) (Tested and works pretty well.)

filtering_code_block = False
code_block_counter = 0
opening_block_counter = 0
# For a detected incorrect word make the check fail and increment the incorrect_spell_check_count by 1.
for line in range(len(file)):
# Remove multiple spaces + symbols + setup suggestion for line.
file[line] = re.sub(r"[,!@\'~?*_~\.$%#]", "", file[line], flags=re.I)
file[line] = re.sub(r"\s+", " ", file[line], flags=re.I)
# file[line] = re.sub(r"```", "++", file[line], flags=re.I)

block_character_counter = 0
# Iterate through the line and count the frequency of the "`" symbol to determine whether the file needs to check for code blocks or code segments.
for index in file[line]:
if index == "`":
block_character_counter += 1

# If a code block is detected iterate through each proceding line until you reach the end of the code block.
if block_character_counter % 2 == 1:
filter_types.append("```")
if block_character_counter % 2 == 0 and block_character_counter != 0:
filter_types.append("`")

temp_line = line
work = True
i = 0

#print(line)
while work and len(filter_types) != 0:
# CASE: If we're checking for the code segment markdown formatter.
if filter_types[i] == "`":
filter_active = False
# NOTE: If there are multiple code segments iterate through the string and intelligently filter out everything inside of them until you reach the end of the line.
if filter_types[i] in file[line]:
for counter, character in enumerate(file[line]):
if character == "`" and filter_active == False:
filter_active = True
open_character = counter
elif character == "`" and filter_active == True:
print(character)
if open_character == 0:
file[line] = file[line][counter + 1:len(file[line]) + 1]
elif open_character > 0:
file[line] = file[line][0:open_character-1] + file[line][counter + 1:len(file[line]) + 1]
filter_active = False

filter_types.pop(i)
work = False

# CASE: If we're checking for the code block markdown formatter and found the opening characters for a code block.
elif filter_types[i] == "```":
if not filtering_code_block:
# Completely clear the current line of the markdown file.
file[line] == ""
filtering_code_block = True
opening_block_counter = line
print("Opening code block")
work = False
filter_types.pop(i)
#break

# Once we have reached the end of the code block
if '```' in file[line] and filtering_code_block and code_block_counter > 0:
# Completely clear the current line of the markdown file.
file[line] == None
filtering_code_block = False
opening_block_counter = 0
filter_types.pop(i)
print("End of code block.")
break

i += 1

# If we're still in the code block set the line to empty.
if filtering_code_block and opening_block_counter < line:
print("Middle of code block")
file[line] = ""
code_block_counter += 1


print("\tRUN: ", file[line])
# Perform spell checking if the current line is not empty.
if file[line] != "":
# Generate spell check suggestions.
suggestions = spellcheck.lookup(
file[line], Verbosity.CLOSEST, transfer_casing=True
)
# For each suggestion for the current line if the first suggestion doesn't match the current word record one incorrect word.
for suggested_correction in suggestions:
suggested_correction_list = str(suggested_correction).split(",")
# When a word is spelled incorrectly increase the incorrect spell count by one.
if file[line] != suggested_correction_list[0]:
print("\tCORRECTION: ", suggested_correction_list[0])
incorrect_spell_check_count += 1
spell_check_outcome = False
break

# If the incorrect_spellcheck count minus the ignore is 0 or greater perform the subtraction.
# Else just return 0.
if incorrect_spell_check_count - abs(ignore) > 0:
incorrect_spell_check_count -= ignore
else:
incorrect_spell_check_count = 0

# Send back the number of incorrectly spelled words and the correctly spelled file state.
return incorrect_spell_check_count, spell_check_outcome
58 changes: 58 additions & 0 deletions tests/checks/test_Check_spellcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""File to ensure the check spellcheck file is correct."""

import pytest

from gator import spelling
from gator.checks import check_spellcheck


def test_no_arguments_incorrect_system_exit(capsys):
"""Ensure that the appropriate output is provided when no arguments are inputted."""
with pytest.raises(SystemExit):
_ = check_spellcheck.parse([])
captured = capsys.readouterr()
# there is no standard output
counted_newlines = captured.out.count("\n")
assert counted_newlines == 0
# standard error has two lines from pytest
assert "usage:" in captured.err
counted_newlines = captured.err.count("\n")
assert counted_newlines == 3


@pytest.mark.parameterize(
"commandline_arguments",
[
(["--file"],
(["--fileWRONG", "filename"])

),
],
)
def test_required_commandline_arguments_cannot_parse():
"""Check that all required command line arguments are used."""


def test_required_commandline_arguments_can_parse():
"""Check that all required command line arguments are used."""


def test_optional_commandline_arguments_cannot_parse():
"""Check that all optional arguments are used correctly."""


def test_optional_commandline_arguments_can_parse():
"""Check that all optional arguments are used correctly."""


def test_optional_commandline_arguments_can_parse_created_parser(
commandline_arguments, not_raises
):


def test_act_produces_output():
""" """


def test_act_produces_output_complex_regex():
""" """
18 changes: 18 additions & 0 deletions tests/test_spelling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""File to check the correctness of the spelling.py file."""

import pytest

from gator import spelling


def test_garbage_word_detection():
"""Check that it will correctly mark garbage words as incorrect."""
test_list = ["Hello", "world"]


def test_spellcheck_using_invalid_inputs():
"""Check that it returns the correct number of correctly spelled words."""


def test_spellcheck_using_valid_inputs():
"""Check that it returns the correct number of correctly spelled words."""