Skip to content

Reduced the error log cause by batch activate template when some devices are already activated #15295

Reduced the error log cause by batch activate template when some devices are already activated

Reduced the error log cause by batch activate template when some devices are already activated #15295

name: Multi-Cluster IT
on:
push:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
pull_request:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
# allow manually run the action:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
MAVEN_ARGS: --batch-mode --no-transfer-progress
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
auto-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2AutoCreateSchema \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-auto-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
manual-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode]
os: [ ubuntu-latest ]
exclude:
- cluster1: LightWeightStandaloneMode
cluster2: LightWeightStandaloneMode
- cluster1: LightWeightStandaloneMode
cluster2: ScalableSingleNodeMode
- cluster1: ScalableSingleNodeMode
cluster2: LightWeightStandaloneMode
- cluster1: ScalableSingleNodeMode
cluster2: HighPerformanceMode
- cluster1: HighPerformanceMode
cluster2: LightWeightStandaloneMode
- cluster1: HighPerformanceMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusBatchMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusBatchMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusStreamMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusStreamMode
cluster2: HighPerformanceMode
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2ManualCreateSchema \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-arch-verification:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionArchVerification \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-consumer:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionConsumer \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-consumer-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-misc:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionMisc \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
table-model:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2TableModel \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30