Skip to content

Commit

Permalink
update url_parse based on package urlparse development (#874)
Browse files Browse the repository at this point in the history
* allow parse to handle all edge cases identified in urlparse https://github.com/DyfanJones/urlparse/blob/main/tests/testthat/test-url_parse.R

* simplify

* set host to lower case before signing

* revert

* ensure path isn't empty

* add base function bindings for mocking

* test rds_build_auth_token_v2 new method
  • Loading branch information
DyfanJones authored Jan 16, 2025
1 parent 50cd4f1 commit 858e5e2
Show file tree
Hide file tree
Showing 7 changed files with 209 additions and 38 deletions.
1 change: 1 addition & 0 deletions paws.common/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ Collate:
'handlers_stream.R'
'idempotency.R'
'jsonutil.R'
'mock_bindings.R'
'onLoad.R'
'paginate.R'
'populate.R'
Expand Down
3 changes: 2 additions & 1 deletion paws.common/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
* moved api log level to `debug` and `trace`. This is to prevent `info` level being saturated by api calls.
* migrate backend `httr` to `httr2`
* new `PawsStreamHandler`, allows paws to handle aws stream event (#842). Thankyou to @hadley for developing the initial solution in `httr2`.
* deprecated custom handler for `s3_unmarshal_select_object_content`
* deprecated custom handler for `s3_unmarshal_select_object_content` in favour or new streamhandler
* migrate `parse_url`, `parse_query_string` and `build_url` to `cpp` for performance improvement.
* ensure `url` is set to lower case before signature

# paws.common 0.7.7
* fix unix time expiration check
Expand Down
58 changes: 51 additions & 7 deletions paws.common/R/custom_rds.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
rds_build_auth_token <- function(endpoint, region, user, creds = NULL) {
rds_build_auth_token <- function(endpoint, user, creds = NULL, region=NULL) {
if (!startsWith(endpoint, "https://")) endpoint <- paste0("https://", endpoint)
req <- new_http_request("GET", endpoint)
auth_token_params <- list(
Expand All @@ -8,18 +8,62 @@ rds_build_auth_token <- function(endpoint, region, user, creds = NULL) {
req$url$raw_query <- update_query_string(req$url$raw_query, auth_token_params)

config <- get_config()
if (is.null(region)) {
region <- get_region(config$region)
if (!is.null(region)) {
config$region <- region
}
if (is.null(creds)) {
creds <- config$credentials$creds
} else {
creds <- populate(creds, tag_annotate(Creds()))
}
v4 <- Signer(credentials = Credentials(creds = creds))
req <- sign_with_body(v4, req, NULL, "rds-db", region, 15 * 60, TRUE, Sys.time())
v4 <- Signer(credentials =Credentials(creds = creds))
req <- sign_with_body(v4, req, NULL, "rds-db", region, 900, TRUE, Sys.time())

url <- build_url(req$url)
url <- gsub("^https?://", "", url)
return(url)
return(substr(url, 9, nchar(url)))
}

rds_build_auth_token_v2 <- function(DBHostname, Port, DBUsername, Region=NULL) {
op <- new_operation(
name = "connect", http_method = "GET",
http_path = "/", host_prefix = "", paginator = list(), stream_api = FALSE
)
input <- list(
Action = "connect",
DBUser = DBUsername
)
output <- list()

config <- get_config()
if (!is.null(Region)) {
config$region <- Region
}

if (!startsWith(DBHostname, "https://")) DBHostname <- paste0("https://", DBHostname)
config$endpoint <- paste(DBHostname, Port, sep = ":")

metadata <- list(
service_name = "rds",
endpoints = list(),
service_id = "RDS",
api_version = "2014-10-31",
signing_name = "rds-db",
json_version = "",
target_prefix = ""
)

handlers <- new_handlers("query", "v4")
svc <- new_service(metadata, handlers, config, op)

# create new request
request <- new_request(svc, op, input, output)
request$expire_time <- 900

# build request
request$http_request$url$raw_query <- build_query_string(request$params)

# sign request
request <- v4_sign_request_handler(request)
url <- build_url(request$http_request$url)
return(substr(url, 9, nchar(url)))
}
6 changes: 6 additions & 0 deletions paws.common/R/mock_bindings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
################################################################
# Base function binding for mocking:
# https://testthat.r-lib.org/reference/local_mocked_bindings.html#base-functions
################################################################

Sys.time <- NULL
13 changes: 7 additions & 6 deletions paws.common/R/signer_v4.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,14 @@ build_context <- function(ctx, disable_header_hoisting) {
}
}
}

ctx <- build_canonical_headers(ctx, unsigned_headers, IGNORED_HEADERS)
ctx <- build_canonical_string(ctx)
# log_debug("Calculating signature using v4 auth.")
# log_debug("CanonicalRequest:\n%s", ctx$canonical_string)
ctx <- build_string_to_sign(ctx)
# log_debug("StringToSign:\n%s", ctx$string_to_sign)
ctx <- build_signature(ctx)

# log_debug("Signature:\n%s", ctx$signature)
if (ctx$is_presigned) {
query <- ctx$request$url$raw_query
ctx$request$url$raw_query <- update_query_string(query, list("X-Amz-Signature" = ctx$signature))
Expand Down Expand Up @@ -379,9 +381,9 @@ build_canonical_headers <- function(ctx, header, ignored_headers) {
for (key in headers) {
if (key == "host") {
if (ctx$request[["host"]] != "") {
header_value <- paste0("host:", ctx$request[["host"]])
header_value <- paste0("host:", tolower(ctx$request[["host"]]))
} else {
header_value <- paste0("host:", ctx$request$url[["host"]])
header_value <- paste0("host:", tolower(ctx$request$url[["host"]]))
}
} else {
value <- ctx$signed_header_vals[[key]]
Expand All @@ -395,7 +397,7 @@ build_canonical_headers <- function(ctx, header, ignored_headers) {

build_canonical_string <- function(ctx) {
if (!is.null(ctx$query)) {
ctx$request$url$raw_query <- gsub("\\+", "%20", build_query_string(ctx$query))
ctx$request$url$raw_query <- gsub("+", "%20", build_query_string(ctx$query), fixed = T)
}

uri <- get_uri_path(ctx$request$url)
Expand All @@ -405,7 +407,6 @@ build_canonical_string <- function(ctx) {
}

# TODO: Delete extra whitespace.

ctx$canonical_string <- paste(
ctx$request$method,
uri,
Expand Down
86 changes: 68 additions & 18 deletions paws.common/src/url_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ class URL
std::string user;
std::string password;
std::string host;
std::string port;
std::string raw_path;
std::string path;
std::string raw_query;
Expand All @@ -148,7 +149,7 @@ class URL
{
url << scheme << "://";
}
if (!user.empty())
if (!user.empty() || !password.empty())
{
url << user;
if (!password.empty())
Expand All @@ -158,8 +159,17 @@ class URL
url << "@";
}
url << host;
if (!port.empty())
{
url << ":" << port;
}
if (!raw_path.empty())
{
// Ensure the raw_path starts with a '/'
if (raw_path[0] != '/')
{
url << "/";
}
url << raw_path;
}
else
Expand Down Expand Up @@ -196,28 +206,68 @@ class URLParser
it = scheme_end + scheme_delim.size(); // Skip "://"
}

// Parse user and password (if present)
auto user_info_end = std::find(it, end, '@');
if (user_info_end != end)
// Parse host (including port if present)
auto host_end = std::find_if(it, end, [](char ch)
{ return ch == '/' || ch == '?' || ch == '#'; });
std::string host_port = std::string(it, host_end);

// Use rfind to locate the last occurrence of ':' in host_port
// Use find to locate '@' in host_port
auto colon_pos = host_port.rfind(':');
auto at_pos = host_port.find('@');

if (colon_pos != std::string::npos)
{
std::string user_info(it, user_info_end);
auto colon_pos = user_info.find(':');
if (colon_pos != std::string::npos)
if (at_pos != std::string::npos)
{
result.user = user_info.substr(0, colon_pos);
result.password = user_info.substr(colon_pos + 1);
if (colon_pos > at_pos)
{
// contains: <user>:<password>@<host>:<port>
result.host = host_port.substr(0, colon_pos);
result.port = host_port.substr(colon_pos + 1);

// split user, password and host
auto user_col_pos = result.host.find(':');
if (user_col_pos != std::string::npos)
{
result.user = result.host.substr(0, user_col_pos);
result.password = result.host.substr(user_col_pos + 1, at_pos - user_col_pos - 1);
result.host = result.host.substr(at_pos + 1);
}
else
{
// assume user when ":" can't be found
result.user = result.host.substr(0, at_pos);
result.host = result.host.substr(at_pos + 1);
}
}
else
{
// contains: <user>:<pass>@<host>
result.user = host_port.substr(0, colon_pos);
result.password = host_port.substr(colon_pos + 1, at_pos - colon_pos - 1);
result.host = host_port.substr(at_pos + 1);
}
}
else
{
result.user = user_info;
// contains: <host>:<port>
result.host = host_port.substr(0, colon_pos);
result.port = host_port.substr(colon_pos + 1);
}
it = user_info_end + 1; // Skip '@'
}
else if (at_pos != std::string::npos)
{
// contains: <user>@<host>
result.user = host_port.substr(0, at_pos);
result.host = host_port.substr(at_pos + 1);
}
else
{
// contains: <host>
result.host = host_port;
}

// Parse host (including port if present)
auto host_end = std::find_if(it, end, [](char ch)
{ return ch == '/' || ch == '?' || ch == '#'; });
result.host = std::string(it, host_end);
it = host_end;

// Parse path
Expand Down Expand Up @@ -265,7 +315,7 @@ class URLParser
Rcpp::List parse_url(const std::string &url)
{
URL parsed_url = URLParser::parse(url);
std::string raw_path = parsed_url.path.empty() ? "/" : parsed_url.path;
std::string raw_path = parsed_url.path;
std::string path = internal_url_unencode(raw_path);
std::string escaped_path = internal_url_encode(raw_path, "$&+,/;:=@");

Expand All @@ -281,8 +331,8 @@ Rcpp::List parse_url(const std::string &url)
Rcpp::Named("opaque") = "",
Rcpp::Named("user") = parsed_url.user,
Rcpp::Named("password") = parsed_url.password,
Rcpp::Named("host") = parsed_url.host,
Rcpp::Named("path") = path,
Rcpp::Named("host") = parsed_url.port.empty() ? parsed_url.host : parsed_url.host + ":" + parsed_url.port,
Rcpp::Named("path") = path.empty() ? "/" : path,
Rcpp::Named("raw_path") = raw_path,
Rcpp::Named("force_query") = false,
Rcpp::Named("raw_query") = raw_query,
Expand Down
80 changes: 74 additions & 6 deletions paws.common/tests/testthat/test_custom_rds.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,81 @@
test_that("rds_build_auth_token", {
actual <- rds_build_auth_token(
"prod-instance.us-east-1.rds.amazonaws.com:3306",
"us-west-2",
"mysqlUser",
test_that("check rds_build_auth_token", {
testthat::local_mocked_bindings(
Sys.time = function() as.POSIXct("2025/01/01 00:00:01 UTC"),
get_config = function() list(credentials = list(creds = list(
access_key_id = "AKIA",
secret_access_key = "SECRET",
session_token = "SESSION"
))),
.package = "paws.common"
)
expected <- "prod-instance.us-east-1.rds.amazonaws.com:3306/?Action=connect&DBUser=mysqlUser&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIA%2F20250101%2Fus-west-2%2Frds-db%2Faws4_request&X-Amz-Date=20250101T000001Z&X-Amz-Expires=900&X-Amz-Security-Token=SESSION&X-Amz-Signature=4e4ce10c7b3710decae757df60ba2519348dcb4db802f15646dce1bf5e17c3ed&X-Amz-SignedHeaders=host"
expected_signature <- "X-Amz-Signature=4e4ce10c7b3710decae757df60ba2519348dcb4db802f15646dce1bf5e17c3ed"

client <- list(
build_auth_token = paws.common:::rds_build_auth_token,
build_auth_token_v2 = paws.common:::rds_build_auth_token_v2
)
actual_v1 <- client$build_auth_token(
endpoint = "prod-instance.us-east-1.rds.amazonaws.com:3306",
region = "us-west-2",
user = "mysqlUser",
creds = list(
access_key_id = "AKIA",
secret_access_key = "SECRET",
session_token = "SESSION"
)
)
expect_match(actual, "^prod-instance\\.us-east-1\\.rds\\.amazonaws\\.com:3306/\\?Action=connect.*?DBUser=mysqlUser.*")

actual_v2 <- client$build_auth_token_v2(
DBHostname = "prod-instance.us-east-1.rds.amazonaws.com",
Port = 3306,
DBUsername = "mysqlUser",
Region = "us-west-2"
)
expect_equal(actual_v1, expected)
expect_equal(actual_v2, expected)
expect_match(actual_v1, expected_signature)
expect_match(actual_v2, expected_signature)
})

test_that("check rds_build_auth_token upper case host", {
local_mocked_bindings(
Sys.time = function() as.POSIXct("2025/01/01 00:00:01 UTC"),
get_config = function() list(credentials = list(creds = list(
access_key_id = "AKIA",
secret_access_key = "SECRET",
session_token = "SESSION"
))),
.package = "paws.common"
)

expected <- "XXXXX.US-EAST-2.RDS.AMAZONAWS.COM:3306/?Action=connect&DBUser=user1&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIA%2F20250101%2Fus-east-2%2Frds-db%2Faws4_request&X-Amz-Date=20250101T000001Z&X-Amz-Expires=900&X-Amz-Security-Token=SESSION&X-Amz-Signature=c57093ec8c5c9a668ceab511fe2d72df1794936673fec2418744f71496eed913&X-Amz-SignedHeaders=host"
expected_signature <- "X-Amz-Signature=c57093ec8c5c9a668ceab511fe2d72df1794936673fec2418744f71496eed913"

client <- list(
build_auth_token = paws.common:::rds_build_auth_token,
build_auth_token_v2 = paws.common:::rds_build_auth_token_v2
)

actual_v1 <- client$build_auth_token(
endpoint = "XXXXX.US-EAST-2.RDS.AMAZONAWS.COM:3306",
region = "us-east-2",
user = "user1",
creds = list(
access_key_id = "AKIA",
secret_access_key = "SECRET",
session_token = "SESSION"
)
)

actual_v2 <- client$build_auth_token_v2(
DBHostname='XXXXX.US-EAST-2.RDS.AMAZONAWS.COM',
Port = 3306,
DBUsername="user1",
Region="us-east-2"
)
expect_equal(actual_v1, expected)
expect_equal(actual_v2, expected)
expect_match(actual_v1, expected_signature)
expect_match(actual_v2, expected_signature)
})

0 comments on commit 858e5e2

Please sign in to comment.