Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add ip_prefix function [4/n] #11514

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions velox/docs/functions/presto/ipaddress.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
===================
IP Functions
===================

.. function:: ip_prefix(ip_address, prefix_bits) -> ipprefix

Returns the IP prefix of a given ``ip_address`` with subnet size of ``prefix_bits``.
``ip_address`` can be either of type ``VARCHAR`` or type ``IPADDRESS``. ::

SELECT ip_prefix(CAST('192.168.255.255' AS IPADDRESS), 9); -- {192.128.0.0/9}
SELECT ip_prefix('2001:0db8:85a3:0001:0001:8a2e:0370:7334', 48); -- {2001:db8:85a3::/48}

3 changes: 2 additions & 1 deletion velox/exec/fuzzer/FuzzerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "velox/dwio/catalog/fbhive/FileUtils.h"
#include "velox/dwio/dwrf/writer/Writer.h"
#include "velox/expression/SignatureBinder.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

using namespace facebook::velox::dwio::catalog::fbhive;

Expand Down Expand Up @@ -283,7 +284,7 @@ bool usesTypeName(
// If 'type' is a RowType or contains RowTypes with empty field names, adds
// default names to these fields in the RowTypes.
TypePtr sanitize(const TypePtr& type) {
if (!type) {
if (!type || isIPPrefixType(type)) {
return type;
}

Expand Down
1 change: 0 additions & 1 deletion velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,6 @@ bool ExpressionFuzzer::isSupportedSignature(
if (usesTypeName(signature, "opaque") ||
usesTypeName(signature, "timestamp with time zone") ||
usesTypeName(signature, "interval day to second") ||
usesTypeName(signature, "ipprefix") ||
(!options_.enableDecimalType && usesTypeName(signature, "decimal")) ||
(!options_.enableComplexTypes && useComplexType) ||
(options_.enableComplexTypes && usesTypeName(signature, "unknown"))) {
Expand Down
59 changes: 59 additions & 0 deletions velox/functions/prestosql/IPAddressFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,73 @@
*/
#pragma once

#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

namespace facebook::velox::functions {

template <typename T>
struct IPPrefixFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<IPPrefix>& result,
const arg_type<IPAddress>& ip,
const arg_type<int64_t>& prefixBits) {
folly::ByteArray16 addrBytes;
memcpy(&addrBytes, &ip, ipaddress::kIPAddressBytes);
std::reverse(addrBytes.begin(), addrBytes.end());

result = makeIPPrefix(folly::IPAddressV6(addrBytes), prefixBits);
}

FOLLY_ALWAYS_INLINE void call(
out_type<IPPrefix>& result,
const arg_type<Varchar>& ipString,
const arg_type<int64_t>& prefixBits) {
auto tryIp = folly::IPAddress::tryFromString(ipString);
if (tryIp.hasError()) {
VELOX_USER_FAIL("Cannot cast value to IPADDRESS: {}", ipString);
}

result = makeIPPrefix(
folly::IPAddress::createIPv6(folly::IPAddress(tryIp.value())),
prefixBits);
}

private:
static std::tuple<int128_t, int8_t> makeIPPrefix(
folly::IPAddressV6 v6Addr,
int64_t prefixBits) {
if (v6Addr.isIPv4Mapped()) {
VELOX_USER_CHECK(
0 <= prefixBits && prefixBits <= ipaddress::kIPV4Bits,
"IPv4 subnet size must be in range [0, 32]");
} else {
VELOX_USER_CHECK(
0 <= prefixBits && prefixBits <= ipaddress::kIPV6Bits,
"IPv6 subnet size must be in range [0, 128]");
}
auto canonicalBytes = v6Addr.isIPv4Mapped()
? v6Addr.createIPv4().mask(prefixBits).createIPv6().toByteArray()
: v6Addr.mask(prefixBits).toByteArray();

int128_t intAddr;
std::reverse(canonicalBytes.begin(), canonicalBytes.end());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see we call std::reverse twice.
While it is fine for the initial implementation and probably not having a major performance impact, but is there a way not to std::reverse at all?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't think of a good way because we store the IPAddress as int128_t instead of varbinary. So we need to pay 2 reverses, one to get the ip, and then another to get back the int128_t.

Since it is only 16 bytes, it shouldn't be that expensive

memcpy(&intAddr, &canonicalBytes, ipaddress::kIPAddressBytes);
return std::make_tuple(intAddr, static_cast<int8_t>(prefixBits));
}
};

void registerIPAddressFunctions(const std::string& prefix) {
registerIPAddressType();
registerIPPrefixType();
registerFunction<IPPrefixFunction, IPPrefix, IPAddress, int64_t>(
{prefix + "ip_prefix"});
registerFunction<IPPrefixFunction, IPPrefix, Varchar, int64_t>(
{prefix + "ip_prefix"});
}

} // namespace facebook::velox::functions
114 changes: 114 additions & 0 deletions velox/functions/prestosql/tests/IPAddressFunctionsTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

namespace facebook::velox::functions::prestosql {
class IPAddressFunctionsTest : public functions::test::FunctionBaseTest {
protected:
std::optional<std::string> ipPrefixFunctionFromIpAddress(
const std::optional<std::string>& input,
const std::optional<int64_t>& mask) {
return evaluateOnce<std::string>(
"cast(ip_prefix(cast(c0 as ipaddress), c1) as varchar)", input, mask);
}

std::optional<std::string> ipPrefixFromVarChar(
const std::optional<std::string>& input,
const std::optional<int64_t>& mask) {
return evaluateOnce<std::string>(
"cast(ip_prefix(c0, c1) as varchar)", input, mask);
}
Comment on lines +23 to +35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are ipPrefixFunctionFromIpAddress and ipPrefixFromVarChar mixed up and should be the other way around?

};

TEST_F(IPAddressFunctionsTest, ipPrefixFromIpAddress) {
ASSERT_EQ(ipPrefixFunctionFromIpAddress("1.2.3.4", 24), "1.2.3.0/24");
ASSERT_EQ(ipPrefixFunctionFromIpAddress("1.2.3.4", 32), "1.2.3.4/32");
ASSERT_EQ(ipPrefixFunctionFromIpAddress("1.2.3.4", 0), "0.0.0.0/0");
ASSERT_EQ(ipPrefixFunctionFromIpAddress("::ffff:1.2.3.4", 24), "1.2.3.0/24");
ASSERT_EQ(ipPrefixFunctionFromIpAddress("64:ff9b::17", 64), "64:ff9b::/64");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress("64:ff9b::17", 127), "64:ff9b::16/127");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress("64:ff9b::17", 128), "64:ff9b::17/128");
ASSERT_EQ(ipPrefixFunctionFromIpAddress("64:ff9b::17", 0), "::/0");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress(
"2001:0db8:85a3:0001:0001:8a2e:0370:7334", 48),
"2001:db8:85a3::/48");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress(
"2001:0db8:85a3:0001:0001:8a2e:0370:7334", 52),
"2001:db8:85a3::/52");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress(
"2001:0db8:85a3:0001:0001:8a2e:0370:7334", 128),
"2001:db8:85a3:1:1:8a2e:370:7334/128");
ASSERT_EQ(
ipPrefixFunctionFromIpAddress(
"2001:0db8:85a3:0001:0001:8a2e:0370:7334", 0),
"::/0");
VELOX_ASSERT_THROW(
ipPrefixFunctionFromIpAddress("::ffff:1.2.3.4", -1),
"IPv4 subnet size must be in range [0, 32]");
VELOX_ASSERT_THROW(
ipPrefixFunctionFromIpAddress("::ffff:1.2.3.4", 33),
"IPv4 subnet size must be in range [0, 32]");
VELOX_ASSERT_THROW(
ipPrefixFunctionFromIpAddress("64:ff9b::10", -1),
"IPv6 subnet size must be in range [0, 128]");
VELOX_ASSERT_THROW(
ipPrefixFunctionFromIpAddress("64:ff9b::10", 129),
"IPv6 subnet size must be in range [0, 128]");
}

TEST_F(IPAddressFunctionsTest, ipPrefixFromVarChar) {
ASSERT_EQ(ipPrefixFromVarChar("1.2.3.4", 24), "1.2.3.0/24");
ASSERT_EQ(ipPrefixFromVarChar("1.2.3.4", 32), "1.2.3.4/32");
ASSERT_EQ(ipPrefixFromVarChar("1.2.3.4", 0), "0.0.0.0/0");
ASSERT_EQ(ipPrefixFromVarChar("::ffff:1.2.3.4", 24), "1.2.3.0/24");
ASSERT_EQ(ipPrefixFromVarChar("64:ff9b::17", 64), "64:ff9b::/64");
ASSERT_EQ(ipPrefixFromVarChar("64:ff9b::17", 127), "64:ff9b::16/127");
ASSERT_EQ(ipPrefixFromVarChar("64:ff9b::17", 128), "64:ff9b::17/128");
ASSERT_EQ(ipPrefixFromVarChar("64:ff9b::17", 0), "::/0");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("::ffff:1.2.3.4", -1),
"IPv4 subnet size must be in range [0, 32]");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("::ffff:1.2.3.4", 33),
"IPv4 subnet size must be in range [0, 32]");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("64:ff9b::10", -1),
"IPv6 subnet size must be in range [0, 128]");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("64:ff9b::10", 129),
"IPv6 subnet size must be in range [0, 128]");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("localhost", 24),
"Cannot cast value to IPADDRESS: localhost");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("64::ff9b::10", 24),
"Cannot cast value to IPADDRESS: 64::ff9b::10");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("64:face:book::10", 24),
"Cannot cast value to IPADDRESS: 64:face:book::10");
VELOX_ASSERT_THROW(
ipPrefixFromVarChar("123.456.789.012", 24),
"Cannot cast value to IPADDRESS: 123.456.789.012");
}

} // namespace facebook::velox::functions::prestosql
Loading