diff --git a/.github/actions/run-e2e-test/action.yml b/.github/actions/run-e2e-test/action.yml index 96e291a882..d54f993718 100644 --- a/.github/actions/run-e2e-test/action.yml +++ b/.github/actions/run-e2e-test/action.yml @@ -68,10 +68,6 @@ runs: shell: bash run: sleep 60 - - name: Display bootnode logs - shell: bash - run: docker logs Node0 --follow & - - name: Download artifact with the test suite image if: inputs.test-case != '' uses: actions/download-artifact@v3 @@ -84,6 +80,7 @@ runs: run: docker load -i aleph-e2e-client.tar - name: Run single e2e test + id: run-single-e2e-test if: inputs.test-case != '' shell: bash run: | @@ -127,6 +124,31 @@ runs: contracts/scripts/clean.sh fi + - name: Get log tarball file name + if: ${{ failure() }} + id: get-log-tarball-file-name + shell: bash + run: | + test_case_escaped=$(echo ${{ inputs.test-case }} | sed 's/::/-/g') + echo "name=${test_case_escaped}" >> $GITHUB_OUTPUT + + - name: Archive logs from failed e2e test + if: ${{ failure() }} + shell: bash + run: | + ./.github/scripts/run_consensus.sh -n '${{ inputs.node-count }}' \ + --archive-logs \ + "aleph-node-${{ steps.get-log-tarball-file-name.outputs.name }}-e2e-failure.tgz" + + - name: Upload logs from failed e2e test + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + path: aleph-node-${{ steps.get-log-tarball-file-name.outputs.name }}-e2e-failure.tgz + name: aleph-node-${{ steps.get-log-tarball-file-name.outputs.name }}-e2e-failure.tgz + if-no-files-found: error + retention-days: 7 + - name: Run finalization e2e test if: inputs.follow-up-finalization-check == 'true' shell: bash diff --git a/.github/scripts/run_consensus.sh b/.github/scripts/run_consensus.sh index ad8607f326..b3870e6365 100755 --- a/.github/scripts/run_consensus.sh +++ b/.github/scripts/run_consensus.sh @@ -20,6 +20,7 @@ MIN_VALIDATOR_COUNT=${MIN_VALIDATOR_COUNT:-4} DOCKER_COMPOSE=${DOCKER_COMPOSE:-docker/docker-compose.yml} OVERRIDE_DOCKER_COMPOSE=${OVERRIDE_DOCKER_COMPOSE:-""} NODE_IMAGE=${NODE_IMAGE:-"aleph-node:latest"} +LOGS_OUTPUT_FILE=${LOGS_OUTPUT_FILE:=""} # ------------------------ argument parsing and usage ----------------------- @@ -29,6 +30,8 @@ Usage $0 [-n|--node-count NODE_COUNT] number of nodes to run + [-a|--archive-logs LOGS_OUTPUT_FILE] + archive logs from all nodes in a tarball file; when this is given, no nodes are run EOF exit 0 } @@ -42,6 +45,10 @@ while [[ $# -gt 0 ]]; do NODE_COUNT="$2" shift 2 ;; + -a|--archive-logs) + LOGS_OUTPUT_FILE="$2" + shift 2 + ;; *) echo "Unrecognized argument $1!" usage @@ -92,6 +99,18 @@ function generate_bootnode_peer_id() { -c "aleph-node key inspect-node-key --file /data/${bootnode_account}/p2p_secret") } +function get_compose_file_list() { + set +u + local docker_compose_file="${1}" + local override_file="${2}" + return_list=("-f" $(realpath "${docker_compose_file}")) + if [[ -n "${override_file}" ]]; then + return_list+=("-f" $(realpath "${override_file}")) + fi + echo ${return_list[@]} + set -u +} + function run_containers() { local authorities_count="$1" local docker_compose_file="$2" @@ -103,11 +122,26 @@ function run_containers() { for index in $(seq 0 "${authorities_count}"); do containers+=("Node${index}") done - if [[ -z ${override_file} ]]; then - docker-compose -f "${docker_compose_file}" up -d "${containers[@]}" - else - docker-compose -f "${docker_compose_file}" -f "${override_file}" up -d "${containers[@]}" - fi + docker-compose $(get_compose_file_list "${docker_compose_file}" "${override_file}") up -d "${containers[@]}" +} + +function archive_logs() { + local tarball_output=$(realpath "${1}") + local node_count="${2}" + local docker_compose_file="${3}" + local override_file="${4}" + + local compose_file_list=$(get_compose_file_list "${docker_compose_file}" "${override_file}") + + echo "Archiving all logs from ${node_count} nodes to a file ${tarball_output}..." + pushd $(mktemp -d) > /dev/null + for index in $(seq 0 "${node_count}"); do + echo "Archiving "Node${index}" logs..." + docker-compose ${compose_file_list} logs --no-color --no-log-prefix "Node${index}" > "Node${index}.log" + done + tar -czf "${tarball_output}" Node* + popd > /dev/null + echo "Done" } # --------------------------------- main script -------------------------------------------- @@ -119,6 +153,11 @@ script_dir=$(dirname "${script_path}") aleph_node_root_dir=$(realpath "${script_dir}/../..") pushd "${aleph_node_root_dir}" > /dev/null +if [[ -n "${LOGS_OUTPUT_FILE}" ]]; then + archive_logs "${LOGS_OUTPUT_FILE}" "${NODE_COUNT}" "${DOCKER_COMPOSE}" "${OVERRIDE_DOCKER_COMPOSE}" + exit 0 +fi + if docker inspect ${NODE_IMAGE} > /dev/null; then echo "aleph-node image tag ${NODE_IMAGE} found locally" else diff --git a/.github/scripts/run_e2e_test.sh b/.github/scripts/run_e2e_test.sh index e56db52b0d..7c1535ace5 100755 --- a/.github/scripts/run_e2e_test.sh +++ b/.github/scripts/run_e2e_test.sh @@ -97,7 +97,21 @@ fi if [[ -n "${OUT_LATENCY:-}" ]]; then ARGS+=(-e OUT_LATENCY) fi - -docker run -v "$(pwd)/contracts:/contracts" -v "$(pwd)/docker/data:/data" "${ARGS[@]}" aleph-e2e-client:latest - -exit $? +timeout_duration="15m" +echo "Running test, logs will be shown when tests finishes or after ${timeout_duration} timeout." +# a hack to set global timeout on a e2e testcase run +# we can't do that on GH yaml level due to https://github.com/actions/runner/issues/1979 +docker_service=$(docker run -v "$(pwd)/contracts:/contracts" -v "$(pwd)/docker/data:/data" -d "${ARGS[@]}" \ + aleph-e2e-client:latest) +set +e +timeout_output=$(timeout "${timeout_duration}" docker wait "${docker_service}") +docker_exit_code=$? +# timeout returns 124 exit code if command times out +# otherwise, docker wait finishes and it prints docker service exit code on stdout +if [[ "${docker_exit_code}" != 124 ]]; then + docker_exit_code="${timeout_output}" +fi +echo "Test exited with exit code ${docker_exit_code}" +echo "Logs from test:" +docker logs "${docker_service}" +exit "${docker_exit_code}" diff --git a/.github/workflows/_run-e2e-tests.yml b/.github/workflows/_run-e2e-tests.yml index 96725a6217..0f252584b8 100644 --- a/.github/workflows/_run-e2e-tests.yml +++ b/.github/workflows/_run-e2e-tests.yml @@ -16,7 +16,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: finalization::finalization - timeout-minutes: 3 run-e2e-rewards-disable-node-test: name: Run e2e reward points - disable node test @@ -31,7 +30,6 @@ jobs: with: test-case: rewards::disable_node follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-token-transfer-test: @@ -46,7 +44,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: token_transfer - timeout-minutes: 20 run-e2e-fee-calculation-test: name: Run e2e fee calculation test @@ -60,7 +57,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: fee_calculation - timeout-minutes: 20 run-e2e-channeling-fee-test: name: Run e2e channeling fee test @@ -74,7 +70,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: channeling_fee_and_tip - timeout-minutes: 20 run-e2e-treasury-access-test: name: Run e2e treasury access test @@ -88,7 +83,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: treasury_access - timeout-minutes: 20 run-e2e-batch-transactions-test: name: Run e2e batch transactions test @@ -102,7 +96,6 @@ jobs: uses: ./.github/actions/run-e2e-test with: test-case: batch_transactions - timeout-minutes: 20 run-e2e-staking-era-payouts-test: name: Run e2e staking era payouts test @@ -117,7 +110,6 @@ jobs: with: test-case: staking_era_payouts follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-staking-new-validator-test: @@ -133,7 +125,6 @@ jobs: with: test-case: staking_new_validator follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-change-validators-test: @@ -149,7 +140,6 @@ jobs: with: test-case: change_validators follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-fail-change-validators-test: name: Run e2e fail change validators test @@ -164,7 +154,6 @@ jobs: with: test-case: fail_changing_validators follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-validators-rotate: name: Run validators rotation test @@ -179,7 +168,6 @@ jobs: with: test-case: validators_rotate follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-era-payout: name: Run era payout test @@ -194,7 +182,6 @@ jobs: with: test-case: era_payout::era_payout follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-era-validators: name: Run era validators test @@ -209,7 +196,6 @@ jobs: with: test-case: era_validators follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-rewards-force-new-era: name: Run force new era test to check rewards @@ -224,7 +210,6 @@ jobs: with: test-case: rewards::force_new_era follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-rewards-stake-change: name: Run reward points with stake changed test @@ -239,7 +224,6 @@ jobs: with: test-case: rewards::points_stake_change follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-rewards-change-stake-force-new-era: name: Run reward points with stake changed and new era forced test @@ -254,7 +238,6 @@ jobs: with: test-case: rewards::change_stake_and_force_new_era follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-rewards-points-basic: name: Run basic reward points calculation test @@ -269,7 +252,6 @@ jobs: with: test-case: points_basic follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-authorities-are-staking: name: Run authorities are staking test @@ -287,7 +269,6 @@ jobs: reserved-seats: 3 non-reserved-seats: 3 follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-ban-automatic: name: Run ban automatic test @@ -302,7 +283,6 @@ jobs: with: test-case: ban_automatic follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-ban-manual: name: Run ban manual test @@ -317,7 +297,6 @@ jobs: with: test-case: ban_manual follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-ban-counter-clearing: name: Run ban counter clearing test @@ -332,7 +311,6 @@ jobs: with: test-case: clearing_session_count follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-ban-threshold: name: Run ban threshold test @@ -347,7 +325,6 @@ jobs: with: test-case: ban_threshold follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-permissionless-ban: name: Run permissionless ban test @@ -362,7 +339,6 @@ jobs: with: test-case: permissionless_ban follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-version-upgrade: name: Run basic (positive) version-upgrade test @@ -380,7 +356,6 @@ jobs: UPGRADE_VERSION: 1 UPGRADE_SESSION: 3 UPGRADE_FINALIZATION_WAIT_SESSIONS: 2 - timeout-minutes: 20 run-e2e-adder-contract-test: name: Run e2e adder contract test @@ -401,7 +376,6 @@ jobs: with: deploy-adder: true test-case: adder - timeout-minutes: 20 run-e2e-finality-version-change: name: Run finality version change test @@ -416,7 +390,6 @@ jobs: with: test-case: finality_version::finality_version_change follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-committee-split-reserved-01: name: Run committee split test with node-0 and node-1 dead @@ -433,7 +406,6 @@ jobs: with: test-case: committee_split::split_test_reserved_01 node-count: 7 - timeout-minutes: 20 run-e2e-committee-split-reserved-12: name: Run committee split test with node-1 and node-2 dead @@ -450,7 +422,6 @@ jobs: with: test-case: committee_split::split_test_reserved_12 node-count: 7 - timeout-minutes: 20 run-e2e-committee-split-reserved-02: name: Run committee split test with node-0 and node-2 dead @@ -467,7 +438,6 @@ jobs: with: test-case: committee_split::split_test_reserved_02 node-count: 7 - timeout-minutes: 20 run-e2e-committee-split-test-success-without-any-deads: name: Run committee split test without any deads @@ -485,7 +455,6 @@ jobs: test-case: committee_split::split_test_success_without_any_deads follow-up-finalization-check: true node-count: 7 - timeout-minutes: 20 run-e2e-committee-split-test-success-with-one-dead: name: Run committee split test with one node dead @@ -503,7 +472,6 @@ jobs: test-case: committee_split::split_test_success_with_one_dead follow-up-finalization-check: true node-count: 7 - timeout-minutes: 20 run-e2e-set-emergency-finalizer: name: Run set emergency finalizer test @@ -518,7 +486,6 @@ jobs: with: test-case: set_emergency_finalizer_test follow-up-finalization-check: true - timeout-minutes: 20 run-e2e-set-lenient-threshold: name: Run set lenient threshold test @@ -533,7 +500,6 @@ jobs: with: test-case: set_lenient_threshold_test follow-up-finalization-check: true - timeout-minutes: 5 run-e2e-chain-dead-scenario: needs: [run-e2e-finalization-test] @@ -551,7 +517,6 @@ jobs: test-case: chain_dead_scenario follow-up-finalization-check: true node-count: 6 - timeout-minutes: 20 run-e2e-committee-split-test-success-with-all-non-reserved-dead: name: Run committee split test with all non-reserved nodes dead @@ -569,4 +534,3 @@ jobs: test-case: committee_split::split_test_success_with_all_non_reserved_dead follow-up-finalization-check: true node-count: 7 - timeout-minutes: 20 diff --git a/docker/common.yml b/docker/common.yml index 83ceee7a9e..8f215087d7 100644 --- a/docker/common.yml +++ b/docker/common.yml @@ -4,7 +4,7 @@ services: environment: - CUSTOM_ARGS=-laleph-party=debug,-laleph-network=debug,-lnetwork-clique=debug,-laleph-finality=debug,-laleph-justification=debug,-laleph-data-store=debug,-laleph-updater=debug,-laleph-metrics=debug - PURGE_BEFORE_START=true - - RUST_LOG=info + - RUST_LOG=debug - CHAIN=/data/chainspec.json - ALLOW_PRIVATE_IPV4=true - DISCOVER_LOCAL=true