Skip to content

Commit

Permalink
Merge pull request mrpowers-io#211 from jeffbrennan/fix-ruff-lint
Browse files Browse the repository at this point in the history
Fix ruff lint
  • Loading branch information
SemyonSinchenko authored Feb 24, 2024
2 parents 972c56a + f604084 commit 6bb7fcd
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 40 deletions.
9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,16 @@ ignore = [
"PLC1901", # Strange thing
"UP007", # Not supported in py3.6
"UP038", # Not supported in all py versions
"SIM108", # Don't create long ternary operators
"PTH123", # Don't force use of Pathlib
"PTH207", # Don't force use of Pathlib
"PTH113", # Don't force use of Pathlib
]
extend-exclude = ["tests", "docs"]

[tool.ruff.per-file-ignores]
"quinn/extensions/column_ext.py" = ["FBT003", "N802"]
"quinn/extensions/__init__.py" = ["F403"]
"quinn/extensions/__init__.py" = ["F401", "F403"]
"quinn/__init__.py" = ["F401", "F403"]
"quinn/functions.py" = ["FBT003"]
"quinn/keyword_finder.py" = ["A002"]
16 changes: 8 additions & 8 deletions quinn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,24 @@
from quinn.functions import (
anti_trim,
approx_equal,
array_choice,
business_days_between,
exists,
forall,
is_false,
is_falsy,
is_not_in,
is_null_or_blank,
is_true,
is_truthy,
multi_equals,
null_between,
remove_all_whitespace,
remove_non_word_characters,
single_space,
uuid5,
week_end_date,
week_start_date,
is_falsy,
is_truthy,
is_false,
is_true,
is_null_or_blank,
is_not_in,
null_between,
array_choice,
)
from quinn.schema_helpers import print_schema_as_code
from quinn.split_columns import split_col
Expand Down
2 changes: 1 addition & 1 deletion quinn/dataframe_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def print_athena_create_table(
:param df: The pyspark.sql.DataFrame to use
:param athena_table_name: The name of the athena table to generate
:param s3location: The S3 location of the parquet data
:return: None
:return: None.
"""
warnings.warn(
"Function print_athena_create_table is deprecated and will be removed in the version 1.0",
Expand Down
11 changes: 5 additions & 6 deletions quinn/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,13 @@
from pyspark.sql.functions import udf


import re
import uuid
from typing import Any

from pyspark.sql.functions import lit, trim, when
import pyspark.sql.functions as F # noqa: N812
from pyspark.sql.functions import lit, trim, when
from pyspark.sql.types import (
ArrayType,
BooleanType,
StringType,
)


Expand Down Expand Up @@ -231,7 +228,8 @@ def array_choice(col: Column, seed: int | None = None) -> Column:


def business_days_between(
start_date: Column, end_date: Column, # noqa: ARG001
start_date: Column, # noqa: ARG001
end_date: Column, # noqa: ARG001
) -> Column:
"""Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date.
Expand Down Expand Up @@ -290,6 +288,7 @@ def uuid5(
F.substring(hashed, 21, 12),
)


def is_falsy(col: Column) -> Column:
"""Returns a Column indicating whether all values in the Column are False or NULL (**falsy**).
Expand Down Expand Up @@ -377,4 +376,4 @@ def null_between(col: Column, lower: Column, upper: Column) -> Column:
).otherwise(col.between(lower, upper)),
),
),
)
)
87 changes: 65 additions & 22 deletions quinn/keyword_finder.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from __future__ import annotations

import os
from glob import iglob


default_keywords = [
"_jsc",
"_jconf",
"_jvm",
"_jsparkSession",
"_jreader",
"_jc",
"_jseq",
"_jdf",
"_jmap",
"_jco"
"_jconf",
"_jvm",
"_jsparkSession",
"_jreader",
"_jc",
"_jseq",
"_jdf",
"_jmap",
"_jco",
"emptyRDD",
"range",
"init_batched_serializer",
Expand Down Expand Up @@ -40,38 +41,80 @@
]


def search_file(path, keywords=default_keywords):
def search_file(path: str, keywords: list[str] = default_keywords) -> None:
"""Searches a file for keywords and prints the line number and line containing the keyword.
:param path: The path to the file to search.
:type path: str
:param keywords: The list of keywords to search for.
:type keywords: list[str]
:returns: None
:rtype: None
"""
print(f"\nSearching: {path}")
with open(path) as f:
for line_number, line in enumerate(f, 1):
for keyword in keywords:
for keyword in keywords:
if keyword in line:
print(f"{line_number}: {keyword_format(line)}", end='')
print(f"{line_number}: {keyword_format(line)}", end="")
break


def search_files(path, keywords=default_keywords):
def search_files(path: str, keywords: list[str] = default_keywords) -> None:
"""Searches all files in a directory for keywords.
:param path: The path to the directory to search.
:type path: str
:param keywords: The list of keywords to search for.
:type keywords: list[str]
:returns: None
:rtype: None
"""
rootdir_glob = f"{path}/**/*"
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
for f in file_list:
search_file(f)
search_file(f, keywords)


def keyword_format(input: str, keywords: list[str] = default_keywords) -> str:
"""Formats the input string to highlight the keywords.
:param input: The string to format.
:type input: str
:param keywords: The list of keywords to highlight.
:type keywords: list[str]
def keyword_format(input, keywords=default_keywords):
nc = '\033[0m'
red = '\033[31m'
bold = '\033[1m'
"""
nc = "\033[0m"
red = "\033[31m"
bold = "\033[1m"
res = input
for keyword in keywords:
res = surround_substring(res, keyword, red+bold, nc)
res = surround_substring(res, keyword, red + bold, nc)
return res


def surround_substring(input, substring, surround_start, surround_end):
def surround_substring(input: str, substring: str, surround_start: str, surround_end: str) -> str:
"""Surrounds a substring with the given start and end strings.
:param input: The string to search.
:type input: str
:param substring: The substring to surround.
:type substring: str
:param surround_start: The string to start the surrounding with.
:type surround_start: str
:param surround_end: The string to end the surrounding with.
:type surround_end: str
:returns: The input string with the substring surrounded.
:rtype: str
"""
index = input.find(substring)
res = ""
if index == -1:
res = input
else:
res = input[:index] + surround_start + substring + surround_end + input[(index+len(substring)):]
res = input[:index] + surround_start + substring + surround_end + input[(index + len(substring)) :]
return res
7 changes: 5 additions & 2 deletions tests/test_keyword_finder.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import pytest
from quinn.keyword_finder import search_file, search_files, keyword_format, surround_substring


def test_search_file():
search_file("tests/test_files/some_pyspark.py")


def test_search_files():
search_files("tests/test_files")


def test_keyword_format():
print(keyword_format("spark rdd stuff"))
print(keyword_format("spark rdd stuff with bad _jvm"))
print(keyword_format("nice string"))
print(keyword_format(""))


def test_surround_substring():
print(surround_substring("spark rdd stuff", "rdd", "**", "||"))
print(surround_substring("spark rdd stuff", "rdd", "**", "||"))

0 comments on commit 6bb7fcd

Please sign in to comment.