From ecbc6ed1240ef0dc5d87eef527cc80468cdf156f Mon Sep 17 00:00:00 2001 From: Radu Gheorghe Date: Tue, 27 Sep 2022 12:49:02 +0300 Subject: [PATCH] Allow reindexing from remote cluster --- README.md | 5 +++-- build.gradle | 2 +- src/main/java/org/sematext/solr/reindexer/Input.java | 2 +- src/main/java/org/sematext/solr/reindexer/Output.java | 2 +- src/main/java/org/sematext/solr/reindexer/Runner.java | 11 ++++++++--- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bcf9f76..e8f8d60 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,8 @@ java -jar solr-reindexer.jar\ -sourceCollection my_collection_v1\ -targetCollection my_collection_v2\ -uniqueKey id\ - -zkAddress localhost:9983,localhost:2181\ + -sourceZkAddress localhost:9983,localhost:2181\ + -targetZkAddress zoo1:2181,zoo2:2181\ -skipFields _version_,text\ -retries 7\ -retryInterval 2000\ @@ -18,7 +19,7 @@ java -jar solr-reindexer.jar\ Only `sourceCollection` and `targetCollection` are mandatory. The rest are: - `uniqueKey`: we use a cursor to go over the data. The cursor requires to sort on the `uniqueKey` defined in the schema, which in turn defaults to `id` -- `zkAddress`: the Zookeeper host:port for SolrCloud. If there are more, comma-separate them +- `sourceZkAddress` and `targetZkAddress`: the Zookeeper host:port for SolrCloud (source and destination). If there are more, comma-separate them - `skipFields`: we reindex all the stored and docValues fields by default. But some may be skipped, like the default `_version_` (which will break the reindex because it will cause a version conflict) or copyFields that are also stored (they'll duplicate the values, because you'll redo the copyField operation). Comma-separate multiple fields - `retries` and `retryInterval`: if we encounter an exception, we wait for `retryInterval` millis and retry up to `retries` times - `query`: you may not want to reindex everything with the default `*:*` diff --git a/build.gradle b/build.gradle index d2620a7..8fc3a16 100644 --- a/build.gradle +++ b/build.gradle @@ -3,7 +3,7 @@ plugins { } group 'org.sematext' -version '1.0' +version '1.1' repositories { mavenCentral() diff --git a/src/main/java/org/sematext/solr/reindexer/Input.java b/src/main/java/org/sematext/solr/reindexer/Input.java index 6ded4be..0fbded7 100644 --- a/src/main/java/org/sematext/solr/reindexer/Input.java +++ b/src/main/java/org/sematext/solr/reindexer/Input.java @@ -24,7 +24,7 @@ public class Input { public Input(Context context) { final List zkServers = new ArrayList<>(); - String[] zkAddresses = context.stringParams.get("zkAddress").split(","); + String[] zkAddresses = context.stringParams.get("sourceZkAddress").split(","); for (String zkAddress: zkAddresses) { zkServers.add(zkAddress); } diff --git a/src/main/java/org/sematext/solr/reindexer/Output.java b/src/main/java/org/sematext/solr/reindexer/Output.java index 9788a93..000f71f 100644 --- a/src/main/java/org/sematext/solr/reindexer/Output.java +++ b/src/main/java/org/sematext/solr/reindexer/Output.java @@ -21,7 +21,7 @@ public class Output { public Output(Context context) { final List zkServers = new ArrayList<>(); - zkServers.add(context.stringParams.get("zkAddress")); + zkServers.add(context.stringParams.get("targetZkAddress")); client = new CloudSolrClient.Builder(zkServers, Optional.empty()) .build(); diff --git a/src/main/java/org/sematext/solr/reindexer/Runner.java b/src/main/java/org/sematext/solr/reindexer/Runner.java index d0b86b6..be61ff3 100644 --- a/src/main/java/org/sematext/solr/reindexer/Runner.java +++ b/src/main/java/org/sematext/solr/reindexer/Runner.java @@ -54,8 +54,12 @@ private static void parseCmdLine(String[] args) throws ParseException { .desc( "Interval between retries in milliseconds. Defaults to 5000" ) .hasArg(true) .build()); - options.addOption(Option.builder( "zkAddress") - .desc( "Zookeeper addresses for SolrCloud. Defaults to 'localhost:2181'. Comma-separate multiple addresses" ) + options.addOption(Option.builder( "sourceZkAddress") + .desc( "Zookeeper addresses for source SolrCloud. Defaults to 'localhost:2181'. Comma-separate multiple addresses" ) + .hasArg(true) + .build()); + options.addOption(Option.builder( "targetZkAddress") + .desc( "Zookeeper addresses for target SolrCloud. Defaults to 'localhost:2181'. Comma-separate multiple addresses" ) .hasArg(true) .build()); options.addOption(Option.builder( "query") @@ -93,7 +97,8 @@ private static void parseCmdLine(String[] args) throws ParseException { setIntegerParam("rows", 1000); setIntegerParam("retries", 10); setIntegerParam("retryInterval", 5000); - setStringParam("zkAddress", "localhost:2181"); + setStringParam("sourceZkAddress", "localhost:2181"); + setStringParam("targetZkAddress", "localhost:2181"); setStringParam("query", "*:*"); setStringParam("uniqueKey", "id"); setStringParam("sourceCollection", null);