Skip to content

IoTConsensusV2: Fix some consensus group missing due to recover failed or blocked #15359

IoTConsensusV2: Fix some consensus group missing due to recover failed or blocked

IoTConsensusV2: Fix some consensus group missing due to recover failed or blocked #15359

name: Multi-Cluster IT
on:
push:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
pull_request:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
# allow manually run the action:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
MAVEN_ARGS: --batch-mode --no-transfer-progress
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
auto-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2AutoCreateSchema \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-auto-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
manual-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode]
os: [ ubuntu-latest ]
exclude:
- cluster1: LightWeightStandaloneMode
cluster2: LightWeightStandaloneMode
- cluster1: LightWeightStandaloneMode
cluster2: ScalableSingleNodeMode
- cluster1: ScalableSingleNodeMode
cluster2: LightWeightStandaloneMode
- cluster1: ScalableSingleNodeMode
cluster2: HighPerformanceMode
- cluster1: HighPerformanceMode
cluster2: LightWeightStandaloneMode
- cluster1: HighPerformanceMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusBatchMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusBatchMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusStreamMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusStreamMode
cluster2: HighPerformanceMode
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2ManualCreateSchema \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-arch-verification:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionArchVerification \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-consumer:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionConsumer \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-consumer-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-misc:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionMisc \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
table-model:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2TableModel \
-ntp
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30