Merge pull request mrpowers-io#211 from jeffbrennan/fix-ruff-lint

Fix ruff lint
kunaljubce · Feb 24, 2024 · 6bb7fcd · 6bb7fcd
2 parents 972c56a + f604084
commit 6bb7fcd
Show file tree

Hide file tree

Showing 6 changed files with 92 additions and 40 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -85,9 +85,16 @@ ignore = [
     "PLC1901", # Strange thing
     "UP007",   # Not supported in py3.6
     "UP038",   # Not supported in all py versions
+    "SIM108",  # Don't create long ternary operators
+    "PTH123",  # Don't force use of Pathlib
+    "PTH207",  # Don't force use of Pathlib
+    "PTH113",  # Don't force use of Pathlib
 ]
 extend-exclude = ["tests", "docs"]
 
 [tool.ruff.per-file-ignores]
 "quinn/extensions/column_ext.py" = ["FBT003", "N802"]
-"quinn/extensions/__init__.py" = ["F403"]
+"quinn/extensions/__init__.py" = ["F401", "F403"]
+"quinn/__init__.py" = ["F401", "F403"]
+"quinn/functions.py" = ["FBT003"]
+"quinn/keyword_finder.py" = ["A002"]
diff --git a/quinn/__init__.py b/quinn/__init__.py
@@ -20,24 +20,24 @@
 from quinn.functions import (
     anti_trim,
     approx_equal,
+    array_choice,
     business_days_between,
     exists,
     forall,
+    is_false,
+    is_falsy,
+    is_not_in,
+    is_null_or_blank,
+    is_true,
+    is_truthy,
     multi_equals,
+    null_between,
     remove_all_whitespace,
     remove_non_word_characters,
     single_space,
     uuid5,
     week_end_date,
     week_start_date,
-    is_falsy,
-    is_truthy,
-    is_false,
-    is_true,
-    is_null_or_blank,
-    is_not_in,
-    null_between,
-    array_choice,
 )
 from quinn.schema_helpers import print_schema_as_code
 from quinn.split_columns import split_col

diff --git a/quinn/dataframe_helpers.py b/quinn/dataframe_helpers.py
@@ -90,7 +90,7 @@ def print_athena_create_table(
     :param df: The pyspark.sql.DataFrame to use
     :param athena_table_name: The name of the athena table to generate
     :param s3location: The S3 location of the parquet data
-    :return: None
+    :return: None.
     """
     warnings.warn(
         "Function print_athena_create_table is deprecated and will be removed in the version 1.0",

diff --git a/quinn/functions.py b/quinn/functions.py
@@ -10,16 +10,13 @@
     from pyspark.sql.functions import udf
 
 
-import re
 import uuid
 from typing import Any
 
-from pyspark.sql.functions import lit, trim, when
 import pyspark.sql.functions as F  # noqa: N812
+from pyspark.sql.functions import lit, trim, when
 from pyspark.sql.types import (
-    ArrayType,
     BooleanType,
-    StringType,
 )
 
 
@@ -231,7 +228,8 @@ def array_choice(col: Column, seed: int | None = None) -> Column:
 
 
 def business_days_between(
-    start_date: Column, end_date: Column, # noqa: ARG001
+    start_date: Column,  # noqa: ARG001
+    end_date: Column,  # noqa: ARG001
 ) -> Column:
     """Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date.
 
@@ -290,6 +288,7 @@ def uuid5(
         F.substring(hashed, 21, 12),
     )
 
+
 def is_falsy(col: Column) -> Column:
     """Returns a Column indicating whether all values in the Column are False or NULL (**falsy**).
 
@@ -377,4 +376,4 @@ def null_between(col: Column, lower: Column, upper: Column) -> Column:
                 ).otherwise(col.between(lower, upper)),
             ),
         ),
-    )
+    )
diff --git a/quinn/keyword_finder.py b/quinn/keyword_finder.py
@@ -1,18 +1,19 @@
+from __future__ import annotations
+
 import os
 from glob import iglob
 
-
 default_keywords = [
     "_jsc",
-    "_jconf", 
-    "_jvm", 
-    "_jsparkSession", 
-    "_jreader", 
-    "_jc", 
-    "_jseq", 
-    "_jdf", 
-    "_jmap", 
-    "_jco"
+    "_jconf",
+    "_jvm",
+    "_jsparkSession",
+    "_jreader",
+    "_jc",
+    "_jseq",
+    "_jdf",
+    "_jmap",
+    "_jco",
     "emptyRDD",
     "range",
     "init_batched_serializer",
@@ -40,38 +41,80 @@
 ]
 
 
-def search_file(path, keywords=default_keywords):
+def search_file(path: str, keywords: list[str] = default_keywords) -> None:
+    """Searches a file for keywords and prints the line number and line containing the keyword.
+
+    :param path: The path to the file to search.
+    :type path: str
+    :param keywords: The list of keywords to search for.
+    :type keywords: list[str]
+    :returns: None
+    :rtype: None
+
+    """
     print(f"\nSearching: {path}")
     with open(path) as f:
         for line_number, line in enumerate(f, 1):
-            for keyword in keywords:    
+            for keyword in keywords:
                 if keyword in line:
-                    print(f"{line_number}: {keyword_format(line)}", end='')
+                    print(f"{line_number}: {keyword_format(line)}", end="")
                     break
 
 
-def search_files(path, keywords=default_keywords):
+def search_files(path: str, keywords: list[str] = default_keywords) -> None:
+    """Searches all files in a directory for keywords.
+
+    :param path: The path to the directory to search.
+    :type path: str
+    :param keywords: The list of keywords to search for.
+    :type keywords: list[str]
+    :returns: None
+    :rtype: None
+
+    """
     rootdir_glob = f"{path}/**/*"
     file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
     for f in file_list:
-        search_file(f)
+        search_file(f, keywords)
+
+
+def keyword_format(input: str, keywords: list[str] = default_keywords) -> str:
+    """Formats the input string to highlight the keywords.
 
+    :param input: The string to format.
+    :type input: str
+    :param keywords: The list of keywords to highlight.
+    :type keywords: list[str]
 
-def keyword_format(input, keywords=default_keywords):
-    nc = '\033[0m'
-    red = '\033[31m'
-    bold = '\033[1m'
+    """
+    nc = "\033[0m"
+    red = "\033[31m"
+    bold = "\033[1m"
     res = input
     for keyword in keywords:
-        res = surround_substring(res, keyword, red+bold, nc)
+        res = surround_substring(res, keyword, red + bold, nc)
     return res
 
 
-def surround_substring(input, substring, surround_start, surround_end):
+def surround_substring(input: str, substring: str, surround_start: str, surround_end: str) -> str:
+    """Surrounds a substring with the given start and end strings.
+
+    :param input: The string to search.
+    :type input: str
+    :param substring: The substring to surround.
+    :type substring: str
+    :param surround_start: The string to start the surrounding with.
+    :type surround_start: str
+    :param surround_end: The string to end the surrounding with.
+    :type surround_end: str
+    :returns: The input string with the substring surrounded.
+    :rtype: str
+
+    """
     index = input.find(substring)
     res = ""
     if index == -1:
         res = input
     else:
-        res = input[:index] + surround_start + substring + surround_end + input[(index+len(substring)):]
+        res = input[:index] + surround_start + substring + surround_end + input[(index + len(substring)) :]
     return res
diff --git a/tests/test_keyword_finder.py b/tests/test_keyword_finder.py
@@ -1,17 +1,20 @@
-import pytest
 from quinn.keyword_finder import search_file, search_files, keyword_format, surround_substring
 
+
 def test_search_file():
     search_file("tests/test_files/some_pyspark.py")
 
+
 def test_search_files():
     search_files("tests/test_files")
 
+
 def test_keyword_format():
     print(keyword_format("spark rdd stuff"))
     print(keyword_format("spark rdd stuff with bad _jvm"))
     print(keyword_format("nice string"))
     print(keyword_format(""))
 
+
 def test_surround_substring():
-    print(surround_substring("spark rdd stuff", "rdd", "**", "||"))
+    print(surround_substring("spark rdd stuff", "rdd", "**", "||"))