From c436b0fc0da73ebce0f2402173cdbf959544d627 Mon Sep 17 00:00:00 2001 From: zhihanz Date: Fri, 6 Sep 2024 14:05:48 +0800 Subject: [PATCH] chore: add official document around auto discovery --- .../databend/jdbc/ConnectionProperties.java | 7 ++ .../com/databend/jdbc/DatabendDriverUri.java | 5 +- .../java/com/databend/jdbc/DatabendNodes.java | 9 +- docs/Connection.md | 83 ++++++++++++++----- .../deploy/config/databend-query-node-1.toml | 2 +- .../deploy/config/databend-query-node-2.toml | 2 + .../deploy/config/databend-query-node-3.toml | 2 + 7 files changed, 84 insertions(+), 26 deletions(-) diff --git a/databend-jdbc/src/main/java/com/databend/jdbc/ConnectionProperties.java b/databend-jdbc/src/main/java/com/databend/jdbc/ConnectionProperties.java index 4be05383..ab870909 100644 --- a/databend-jdbc/src/main/java/com/databend/jdbc/ConnectionProperties.java +++ b/databend-jdbc/src/main/java/com/databend/jdbc/ConnectionProperties.java @@ -24,6 +24,7 @@ public final class ConnectionProperties { public static final ConnectionProperty MAX_FAILOVER_RETRY = new MaxFailoverRetry(); public static final ConnectionProperty LOAD_BALANCING_POLICY = new LoadBalancingPolicy(); public static final ConnectionProperty AUTO_DISCOVERY = new AutoDiscovery(); + public static final ConnectionProperty NODE_DISCOVERY_INTERVAL = new NodeDiscoveryInterval(); public static final ConnectionProperty ENABLE_MOCK = new EnableMock(); public static final ConnectionProperty DATABASE = new Database(); public static final ConnectionProperty ACCESS_TOKEN = new AccessToken(); @@ -162,6 +163,12 @@ public AutoDiscovery() { } } + private static class NodeDiscoveryInterval extends AbstractConnectionProperty { + public NodeDiscoveryInterval() { + super("node_discovery_interval", Optional.of("300000"), NOT_REQUIRED, ALLOWED, INTEGER_CONVERTER); + } + } + private static class EnableMock extends AbstractConnectionProperty { public EnableMock() { super("enable_mock", Optional.of("false"), NOT_REQUIRED, ALLOWED, BOOLEAN_CONVERTER); diff --git a/databend-jdbc/src/main/java/com/databend/jdbc/DatabendDriverUri.java b/databend-jdbc/src/main/java/com/databend/jdbc/DatabendDriverUri.java index 8e740262..25f69d5a 100644 --- a/databend-jdbc/src/main/java/com/databend/jdbc/DatabendDriverUri.java +++ b/databend-jdbc/src/main/java/com/databend/jdbc/DatabendDriverUri.java @@ -63,6 +63,7 @@ public final class DatabendDriverUri { private final Integer waitTimeSecs; private final Integer maxRowsInBuffer; private final Integer maxRowsPerPage; + private final int nodeDiscoveryInterval; // private final boolean useSecureConnection; @@ -81,12 +82,14 @@ private DatabendDriverUri(String url, Properties driverProperties) this.tenant = TENANT.getValue(properties).orElse(""); this.maxFailoverRetry = MAX_FAILOVER_RETRY.getValue(properties).orElse(0); this.autoDiscovery = AUTO_DISCOVERY.getValue(properties).orElse(false); + this.nodeDiscoveryInterval = NODE_DISCOVERY_INTERVAL.getValue(properties).orElse(5 * 60 * 1000); List finalUris = canonicalizeUris(uris, this.useSecureConnection, this.sslmode); DatabendClientLoadBalancingPolicy policy = DatabendClientLoadBalancingPolicy.create(LOAD_BALANCING_POLICY.getValue(properties).orElse(DatabendClientLoadBalancingPolicy.DISABLED)); DatabendNodes nodes = uriAndProperties.getKey(); nodes.updateNodes(finalUris); nodes.updatePolicy(policy); nodes.setSSL(this.useSecureConnection, this.sslmode); + nodes.setDiscoveryInterval(this.nodeDiscoveryInterval); this.nodes = nodes; this.database = DATABASE.getValue(properties).orElse("default"); this.presignedUrlDisabled = PRESIGNED_URL_DISABLED.getRequiredValue(properties); @@ -278,7 +281,7 @@ private static Map.Entry> parse(String url) uris.addAll(uriSet); // Create DatabendNodes object DatabendClientLoadBalancingPolicy policy = DatabendClientLoadBalancingPolicy.create(DatabendClientLoadBalancingPolicy.DISABLED); // You might want to make this configurable - DatabendNodes databendNodes = new DatabendNodes(uris, policy, uriPath, uriQuery, uriFragment); + DatabendNodes databendNodes = new DatabendNodes(uris, policy, uriPath, uriQuery, uriFragment, 5 * 60 * 1000); return new AbstractMap.SimpleImmutableEntry<>(databendNodes, uriProperties); } catch (URISyntaxException e) { throw new SQLException("Invalid URI: " + raw, e); diff --git a/databend-jdbc/src/main/java/com/databend/jdbc/DatabendNodes.java b/databend-jdbc/src/main/java/com/databend/jdbc/DatabendNodes.java index aba3a498..86757251 100644 --- a/databend-jdbc/src/main/java/com/databend/jdbc/DatabendNodes.java +++ b/databend-jdbc/src/main/java/com/databend/jdbc/DatabendNodes.java @@ -26,7 +26,7 @@ public class DatabendNodes implements DatabendNodeRouter { @Setter private boolean debug = false; // minimum time between discovery - protected long discoveryInterval = 1000 * 60 * 5; + protected long discoveryInterval; protected DatabendClientLoadBalancingPolicy policy; private final String uriPath; @@ -36,13 +36,14 @@ public class DatabendNodes implements DatabendNodeRouter { private boolean useSecureConnection = false; private String sslmode = "disable"; - public DatabendNodes(List queryNodesUris, DatabendClientLoadBalancingPolicy policy, String UriPath, String UriQuery, String UriFragment) { + public DatabendNodes(List queryNodesUris, DatabendClientLoadBalancingPolicy policy, String UriPath, String UriQuery, String UriFragment, long discoveryInterval) { this.query_nodes_uris = new AtomicReference<>(queryNodesUris); this.policy = policy; this.index = new AtomicInteger(0); this.uriPath = UriPath; this.uriQuery = UriQuery; this.uriFragment = UriFragment; + this.discoveryInterval = discoveryInterval; } @Override @@ -55,6 +56,10 @@ public void setSSL(boolean useSecureConnection, String sslmode) { this.sslmode = sslmode; } + public void setDiscoveryInterval(long discoveryInterval) { + this.discoveryInterval = discoveryInterval; + } + public void updateNodes(List query_nodes_uris) { this.query_nodes_uris.set(query_nodes_uris); } diff --git a/docs/Connection.md b/docs/Connection.md index 140808cf..6ad56574 100644 --- a/docs/Connection.md +++ b/docs/Connection.md @@ -82,11 +82,48 @@ There are three load balancing options available: **NOTICE:** -When configuring SSL, it's recommended to use the approach shown in the last example, which allows for more detailed SSL configuration including certificate verification. +1. When configuring SSL, it's recommended to use the approach shown in the last example, which allows for more detailed SSL configuration including certificate verification. -Remember to replace the hostnames, ports, and file paths with your actual Databend cluster configuration and SSL certificate locations. +2. Remember to replace the hostnames, ports, and file paths with your actual Databend cluster configuration and SSL certificate locations. + +3. Failover retry occur only for connection issues (java.net.ConnectException), other exception will NOT trigger retry. +4. Databend-jdbc support Transaction. During a transaction, the connection will be pinned to the same node, and the load balancing policy will be disabled. once the transaction is commited or aborted the connection will be released and the load balancing policy will be enabled again. + + + + +#### Automatic Node Discovery + +| Parameter | Description | Default | example | +|------------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------------------| +| auto_discovery | Automatically discover possible cluster nodes in a databend query cluster | false | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true | +| node_discovery_interval | Minimum interval between two automatic node discovery actions in milliseconds | 5 * 60 * 1000 | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true&node_discovery_interval=600000 | + +Automatic Node Discovery will try to discover existing databend query cluster using /v1/discovery_nodes api, it will be closed if the target api is not supported on your databend version(minimum version: v1.2.629-nightly), it passsively probe the possible node list which new query occured after given `node_discovery_interval` and update possible node lists used for load balancing and failover. it will not use thread pool or executor service to start a background thread to handle the task. + +**NOTICE:** +As the cluster ip/dns may vary based your network environment, it is recommend to give all possible nodes in the same warehouse and tenant a fixed ip or dns for reliable node discovery. +Sample Configuration: + +```toml +[query] +discovery_address = "localhost:8000" + +# Databend Query HTTP Handler. +http_handler_host = "0.0.0.0" +http_handler_port = 8000 + +tenant_id = "test_tenant" +cluster_id = "test_cluster" +``` + + +In the above node configuration file, `discovery_address` is used for jdbc to connect the target node if it was discovered by the node discovery api from other nodes located in the same warehouse(with same tenant_id and cluster_id) +If `discovery_address` is not set, the address is determined based on three scenarios: +1. If the user has directly modified the discovery-address in the configuration, this value is returned. +2. If the user has configured an HTTP address that is not 0.0.0.0 or 127.0.0.1, this HTTP address is returned. +3. If the user has configured an HTTP address as 0.0.0.0 or 127.0.0.1, the system will probe to detect a suitable IP address. The IP address that is successfully routed through the network and can communicate with the meta service will be returned. -Failover retry occur only for connection issues (java.net.ConnectException), other exception will NOT trigger retry ## Connection parameters @@ -109,22 +146,24 @@ String url="jdbc:databend://databend:secret@0.0.0.0:8000/hello_databend"; ### Parameter References -| Parameter | Description | Default | example | -|------------------------|---------------------------------------------------------------------------------------------------------------------------|----------|-------------------------------------------------------------------------| -| user | Databend user name | none | jdbc:databend://0.0.0.0:8000/hello_databend?user=test | -| password | Databend user password | none | jdbc:databend://0.0.0.0:8000/hello_databend?password=secret | -| SSL | Enable SSL | false | jdbc:databend://0.0.0.0:8000/hello_databend?SSL=true | -| sslmode | SSL mode | disable | jdbc:databend://0.0.0.0:8000/hello_databend?sslmode=enable | -| copy_purge | If True, the command will purge the files in the stage after they are loaded successfully into the table | false | jdbc:databend://0.0.0.0:8000/hello_databend?copy_purge=true | -| presigned_url_disabled | whether use presigned url to upload data, generally if you use local disk as your storage layer, it should be set as true | false | jdbc:databend://0.0.0.0:8000/hello_databend?presigned_url_disabled=true | -| wait_time_secs | Restful query api blocking time, if the query is not finished, the api will block for wait_time_secs seconds | 10 | jdbc:databend://0.0.0.0:8000/hello_databend?wait_time_secs=10 | -| max_rows_in_buffer | the maximum rows in server session buffer | 5000000 | jdbc:databend://0.0.0.0:8000/hello_databend?max_rows_in_buffer=5000000 | -| max_rows_per_page | the maximum rows per page in response data body | 100000 | jdbc:databend://0.0.0.0:8000/default?max_rows_per_page=100000 | -| connection_timeout | okhttp connection_timeout param | 0 | jdbc:databend://0.0.0.0:8000/default?connection_timeout=100000 | -| query_timeout | time that you wait a SQL execution | 90 | jdbc:databend://0.0.0.0:8000/default?query_timeout=120 | -| null_display | null value display | \N | jdbc:databend://0.0.0.0:8000/hello_databend?null_display=null | -| binary_format | binary format, support hex and base64 | hex | jdbc:databend://0.0.0.0:8000/default?binary_format=hex | -| use_verify | whether verify the server before establishing the connection | true | jdbc:databend://0.0.0.0:8000/default?use_verify=true | -| debug | whether enable debug mode | false | jdbc:databend://0.0.0.0:8000/default?debug=true | -| load_balancing_policy | Specifies the load balancing policy for multi-host connections. Options are "disabled", "random", and "round_robin". | disabled | jdbc:databend://localhost:8000,localhost:8002,localhost:8003/default?load_balancing_policy=random | -| max_failover_retry | Specifies the maximum number of retry attempts for failover connections. | 0 | jdbc:databend://localhost:7222,localhost:7223,localhost:7224,localhost:8000/default?max_failover_retry=4 | +| Parameter | Description | Default | example | +|------------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------------------------| +| user | Databend user name | none | jdbc:databend://0.0.0.0:8000/hello_databend?user=test | +| password | Databend user password | none | jdbc:databend://0.0.0.0:8000/hello_databend?password=secret | +| SSL | Enable SSL | false | jdbc:databend://0.0.0.0:8000/hello_databend?SSL=true | +| sslmode | SSL mode | disable | jdbc:databend://0.0.0.0:8000/hello_databend?sslmode=enable | +| copy_purge | If True, the command will purge the files in the stage after they are loaded successfully into the table | false | jdbc:databend://0.0.0.0:8000/hello_databend?copy_purge=true | +| presigned_url_disabled | whether use presigned url to upload data, generally if you use local disk as your storage layer, it should be set as true | false | jdbc:databend://0.0.0.0:8000/hello_databend?presigned_url_disabled=true | +| wait_time_secs | Restful query api blocking time, if the query is not finished, the api will block for wait_time_secs seconds | 10 | jdbc:databend://0.0.0.0:8000/hello_databend?wait_time_secs=10 | +| max_rows_in_buffer | the maximum rows in server session buffer | 5000000 | jdbc:databend://0.0.0.0:8000/hello_databend?max_rows_in_buffer=5000000 | +| max_rows_per_page | the maximum rows per page in response data body | 100000 | jdbc:databend://0.0.0.0:8000/default?max_rows_per_page=100000 | +| connection_timeout | okhttp connection_timeout param | 0 | jdbc:databend://0.0.0.0:8000/default?connection_timeout=100000 | +| query_timeout | time that you wait a SQL execution | 90 | jdbc:databend://0.0.0.0:8000/default?query_timeout=120 | +| null_display | null value display | \N | jdbc:databend://0.0.0.0:8000/hello_databend?null_display=null | +| binary_format | binary format, support hex and base64 | hex | jdbc:databend://0.0.0.0:8000/default?binary_format=hex | +| use_verify | whether verify the server before establishing the connection | true | jdbc:databend://0.0.0.0:8000/default?use_verify=true | +| debug | whether enable debug mode | false | jdbc:databend://0.0.0.0:8000/default?debug=true | +| load_balancing_policy | Specifies the load balancing policy for multi-host connections. Options are "disabled", "random", and "round_robin". | disabled | jdbc:databend://localhost:8000,localhost:8002,localhost:8003/default?load_balancing_policy=random | +| max_failover_retry | Specifies the maximum number of retry attempts for failover connections. | 0 | jdbc:databend://localhost:7222,localhost:7223,localhost:7224,localhost:8000/default?max_failover_retry=4 | +| auto_discovery | Automatically discover possible cluster nodes in a databend query cluster | false | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true | +| node_discovery_interval | Minimum interval between two automatic node discovery actions in milliseconds | 5 * 60 * 1000 | jdbc:databend://0.0.0.0:8000/default?node_discovery_interval=600000 | diff --git a/scripts/deploy/config/databend-query-node-1.toml b/scripts/deploy/config/databend-query-node-1.toml index b874b6da..9fd27cdf 100644 --- a/scripts/deploy/config/databend-query-node-1.toml +++ b/scripts/deploy/config/databend-query-node-1.toml @@ -14,7 +14,7 @@ admin_api_address = "0.0.0.0:8080" # Databend Query metrics RESET API. metric_api_address = "0.0.0.0:7070" - +discovery_address = "localhost:8000" # Databend Query MySQL Handler. mysql_handler_host = "0.0.0.0" mysql_handler_port = 3307 diff --git a/scripts/deploy/config/databend-query-node-2.toml b/scripts/deploy/config/databend-query-node-2.toml index b3e26140..87308112 100644 --- a/scripts/deploy/config/databend-query-node-2.toml +++ b/scripts/deploy/config/databend-query-node-2.toml @@ -15,6 +15,8 @@ admin_api_address = "0.0.0.0:8082" # Databend Query metrics RESET API. metric_api_address = "0.0.0.0:7072" +discovery_address = "localhost:8002" + # Databend Query MySQL Handler. mysql_handler_host = "0.0.0.0" mysql_handler_port = 3308 diff --git a/scripts/deploy/config/databend-query-node-3.toml b/scripts/deploy/config/databend-query-node-3.toml index 105145dd..2d29f3ea 100644 --- a/scripts/deploy/config/databend-query-node-3.toml +++ b/scripts/deploy/config/databend-query-node-3.toml @@ -15,6 +15,8 @@ admin_api_address = "0.0.0.0:8083" # Databend Query metrics RESET API. metric_api_address = "0.0.0.0:7073" +discovery_address = "localhost:8003" + # Databend Query MySQL Handler. mysql_handler_host = "0.0.0.0" mysql_handler_port = 3309