Skip to content

Commit

Permalink
Add a small feature to compute and report text similarity when a CTE …
Browse files Browse the repository at this point in the history
…is overwritten
  • Loading branch information
jarodmeng committed Feb 14, 2025
1 parent d5aaba6 commit 3013b53
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Imports:
lifecycle,
lubridate,
progress,
vctrs
vctrs,
stringdist
Suggests:
testthat,
hms,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ importFrom(methods,new)
importFrom(progress,progress_bar)
importFrom(rlang,"!!!")
importFrom(rlang,":=")
importFrom(stringdist,stringdist)
importMethodsFrom(DBI,dbAppendTable)
importMethodsFrom(DBI,dbClearResult)
importMethodsFrom(DBI,dbConnect)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Replaced deprecated `with_mock()` usage in unit testing with
`with_mocked_bindings()`. (#292)
* Add a feature to report text similarity when a CTE is overwritten. (#294)

# RPresto 1.4.7

Expand Down
17 changes: 15 additions & 2 deletions R/PrestoSession.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @slot .ctes List of common table expressions (CTEs), i.e. SELECT statements
#' with names. They can be used in a WITH statement.
#' @keywords internal
#' @importFrom stringdist stringdist
PrestoSession <- setRefClass("PrestoSession",
fields = c(
".parameters",
Expand Down Expand Up @@ -67,8 +68,20 @@ PrestoSession <- setRefClass("PrestoSession",
}
if (hasCTE(name)) {
if (identical(replace, TRUE)) {
.ctes[[match(name, getCTENames())]] <<- sql
message("CTE ", name, " is replaced.")
cte_idx <- match(name, getCTENames())
old_sql <- .ctes[[cte_idx]]
old_sql_no_ws <- gsub("\\s", "", old_sql)
sql_no_ws <- gsub("\\s", "", sql)
.ctes[[cte_idx]] <<- sql
lcs_dist <- stringdist::stringdist(
old_sql_no_ws, sql_no_ws, method = "lcs"
)
max_length <- max(nchar(old_sql_no_ws), nchar(sql_no_ws))
percentage_similarity <- (1 - (lcs_dist / max_length)) * 100
message(
"CTE ", name, " is replaced (",
sprintf("%.0f%%", percentage_similarity), " similarity)."
)
} else {
stop(
"CTE ", name, " already exists and repalce is set to FALSE.",
Expand Down

0 comments on commit 3013b53

Please sign in to comment.