From 41e25029b908e2a16019b0bf2824677d0af30374 Mon Sep 17 00:00:00 2001 From: Steve Yurong Su Date: Sun, 26 Jan 2025 10:08:28 +0800 Subject: [PATCH] Pipe IT: Fix for release --- .github/workflows/pipe-it-2cluster.yml | 547 ++++++++++++++++++ .../it/tablemodel/IoTDBPipeProtocolIT.java | 16 +- 2 files changed, 548 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/pipe-it-2cluster.yml diff --git a/.github/workflows/pipe-it-2cluster.yml b/.github/workflows/pipe-it-2cluster.yml new file mode 100644 index 000000000000..4940c926ab91 --- /dev/null +++ b/.github/workflows/pipe-it-2cluster.yml @@ -0,0 +1,547 @@ +name: Multi-Cluster IT + +on: + push: + branches: + - master + - 'rel/1.*' + - 'rc/1.*' + - 'force_ci/**' + paths-ignore: + - 'docs/**' + - 'site/**' + - 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine + pull_request: + branches: + - master + - 'rel/1.*' + - 'rc/1.*' + - 'force_ci/**' + paths-ignore: + - 'docs/**' + - 'site/**' + - 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine + # allow manually run the action: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 + MAVEN_ARGS: --batch-mode --no-transfer-progress + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + +jobs: + auto-create-schema: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [17] + # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet. + cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode] + os: [ ubuntu-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \ + -pl integration-test \ + -am -PMultiClusterIT2AutoCreateSchema \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-auto-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }} + path: integration-test/target/cluster-logs + retention-days: 30 + manual-create-schema: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [17] + # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet. + cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode] + cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode] + os: [ ubuntu-latest ] + exclude: + - cluster1: LightWeightStandaloneMode + cluster2: LightWeightStandaloneMode + - cluster1: LightWeightStandaloneMode + cluster2: ScalableSingleNodeMode + - cluster1: ScalableSingleNodeMode + cluster2: LightWeightStandaloneMode + - cluster1: ScalableSingleNodeMode + cluster2: HighPerformanceMode + - cluster1: HighPerformanceMode + cluster2: LightWeightStandaloneMode + - cluster1: HighPerformanceMode + cluster2: HighPerformanceMode + - cluster1: PipeConsensusBatchMode + cluster2: LightWeightStandaloneMode + - cluster1: PipeConsensusBatchMode + cluster2: HighPerformanceMode + - cluster1: PipeConsensusStreamMode + cluster2: LightWeightStandaloneMode + - cluster1: PipeConsensusStreamMode + cluster2: HighPerformanceMode + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \ + -pl integration-test \ + -am -PMultiClusterIT2ManualCreateSchema \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }} + path: integration-test/target/cluster-logs + retention-days: 30 + subscription-arch-verification: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [ 17 ] + # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet. + cluster1: [ ScalableSingleNodeMode ] + cluster2: [ ScalableSingleNodeMode ] + os: [ ubuntu-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \ + -pl integration-test \ + -am -PMultiClusterIT2SubscriptionArchVerification \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-subscription-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }} + path: integration-test/target/cluster-logs + retention-days: 30 + subscription-regression-consumer: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [ 17 ] + # do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal + cluster1: [ ScalableSingleNodeMode ] + cluster2: [ ScalableSingleNodeMode ] + os: [ ubuntu-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \ + -pl integration-test \ + -am -PMultiClusterIT2SubscriptionRegressionConsumer \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-subscription-regression-consumer-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }} + path: integration-test/target/cluster-logs + retention-days: 30 + subscription-regression-misc: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [ 17 ] + # do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal + cluster1: [ ScalableSingleNodeMode ] + cluster2: [ ScalableSingleNodeMode ] + os: [ ubuntu-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \ + -pl integration-test \ + -am -PMultiClusterIT2SubscriptionRegressionMisc \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-subscription-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }} + path: integration-test/target/cluster-logs + retention-days: 30 + table-model: + strategy: + fail-fast: false + max-parallel: 15 + matrix: + java: [17] + # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet. + cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode] + os: [ ubuntu-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: liberica + java-version: ${{ matrix.java }} + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + - name: Sleep for a random duration between 0 and 10000 milliseconds + run: | + sleep $(( $(( RANDOM % 10000 + 1 )) / 1000)) + - name: IT Test + shell: bash + # we do not compile client-cpp for saving time, it is tested in client.yml + # we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml + run: | + retry() { + local -i max_attempts=3 + local -i attempt=1 + local -i retry_sleep=5 + local test_output + + while [ $attempt -le $max_attempts ]; do + mvn clean verify \ + -P with-integration-tests \ + -DskipUTs \ + -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \ + -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \ + -pl integration-test \ + -am -PMultiClusterIT2TableModel \ + -ntp >> ~/run-tests-$attempt.log && return 0 + test_output=$(cat ~/run-tests-$attempt.log) + + mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/ + + echo "==================== BEGIN: ~/run-tests-$attempt.log ====================" + echo "$test_output" + echo "==================== END: ~/run-tests-$attempt.log ======================" + + if echo "$test_output" | grep -q "Could not transfer artifact"; then + if [ $attempt -lt $max_attempts ]; then + echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..." + sleep $retry_sleep + attempt=$((attempt + 1)) + else + echo "Test failed after $max_attempts attempts due to artifact transfer issue." + echo "Treating this as a success because the issue is likely transient." + return 0 + fi + elif [ $? -ne 0 ]; then + echo "Test failed with a different error." + return 1 + else + echo "Tests passed" + return 0 + fi + done + } + retry + - name: Upload Artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }} + path: integration-test/target/cluster-logs + retention-days: 30 \ No newline at end of file diff --git a/integration-test/src/test/java/org/apache/iotdb/pipe/it/tablemodel/IoTDBPipeProtocolIT.java b/integration-test/src/test/java/org/apache/iotdb/pipe/it/tablemodel/IoTDBPipeProtocolIT.java index 046c5f68fece..79e97ac1b788 100644 --- a/integration-test/src/test/java/org/apache/iotdb/pipe/it/tablemodel/IoTDBPipeProtocolIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/pipe/it/tablemodel/IoTDBPipeProtocolIT.java @@ -355,11 +355,6 @@ public void testAsyncConnectorUseNodeUrls() throws Exception { doTestUseNodeUrls(BuiltinPipePlugin.IOTDB_THRIFT_ASYNC_CONNECTOR.getPipePluginName()); } - @Test - public void testAirGapConnectorUseNodeUrls() throws Exception { - doTestUseNodeUrls(BuiltinPipePlugin.IOTDB_AIR_GAP_CONNECTOR.getPipePluginName()); - } - private void doTestUseNodeUrls(String connectorName) throws Exception { senderEnv .getConfig() @@ -401,16 +396,7 @@ private void doTestUseNodeUrls(String connectorName) throws Exception { boolean insertResult = true; for (final DataNodeWrapper wrapper : receiverEnv.getDataNodeWrapperList()) { - if (connectorName.equals(BuiltinPipePlugin.IOTDB_AIR_GAP_CONNECTOR.getPipePluginName())) { - // Use default port for convenience - nodeUrlsBuilder - .append(wrapper.getIp()) - .append(":") - .append(wrapper.getPipeAirGapReceiverPort()) - .append(","); - } else { - nodeUrlsBuilder.append(wrapper.getIpAndPortString()).append(","); - } + nodeUrlsBuilder.append(wrapper.getIpAndPortString()).append(","); } try (final SyncConfigNodeIServiceClient client =