diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 63fddfa..de2344a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,5 @@ # https://help.github.com/en/actions/reference/workflow-syntax-for-github-actions -name: Build env +name: Build environment # name of GitHub event that triggers workflow # https://help.github.com/en/actions/reference/events-that-trigger-workflows#watch-event-watch @@ -44,7 +44,6 @@ env: EMAIL_ADDRESS: certadmin@domain.com ENABLE_TLS_INGRESS: true FORCE_TEST_FAIL: false - HAS_SUBDOMAIN: true K8S_TLS_SECRET_NAME: tls-secret KEY_VAULT_NAME: kv-rush-iz6y KEY_VAULT_CERT_NAME: wildcard-thehypepipe-co-uk @@ -107,6 +106,12 @@ jobs: - name: Login to Azure run: ./scripts/azure_login.sh + # This is required when developing after the initial build, and the AKS cluster may have been stopped + # Ensure AKS cluster is running, else timeouts will occur on k8s Terraform apply tasks + - name: Start AKS Cluster + continue-on-error: true + run: ./scripts/start_aks_cluster.sh + # Prereqs - name: Create Storage Account for Terraform state @@ -209,8 +214,15 @@ jobs: run: ./scripts/push_docker_images.sh + # TODO: Remove once issue has been fixed + # https://github.com/terraform-providers/terraform-provider-azurerm/issues/8546 + - name: Restart Function App + run: az functionapp restart --name "${{ env.PREFIX }}-funcapp" --resource-group "${{ env.PREFIX }}-rg-function-app" + + # Pester tests - name: Run Pester tests + continue-on-error: true run: pwsh -command "./scripts/Start-Test.ps1" - name: Archive test artifacts @@ -218,6 +230,7 @@ jobs: with: name: test results path: test/pester-test-results.xml + # TODO: only run when previous task (Pester tests) has been successful if: always() # remove NuGet proxy repo so pester report step doesnt fail @@ -226,6 +239,8 @@ jobs: # Shows at the bottom of a run: https://github.com/adamrushuk/aks-nexus-velero/runs/1035347513?check_suite_focus=true - name: Pester report + # TODO: remove continue-on-error once bug is fixed + continue-on-error: true uses: zyborg/pester-tests-report@v1.3.2 with: test_results_path: test/pester-test-results.xml @@ -237,11 +252,9 @@ jobs: # Notify - name: Notify slack env: + SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - uses: pullreminders/slack-action@v1.0.9 - with: - # env var concatenation not supported atm, so hard-code messages - args: '{\"channel\":\"C012ZQHT9A4\",\"text\":\"[aks-nexus-velero] Build complete\"}' + run: ./scripts/send_slack_message.sh "[aks-nexus-velero] Build complete" # used for any windows-only tasks diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml index 9ed843a..e209c15 100644 --- a/.github/workflows/destroy.yml +++ b/.github/workflows/destroy.yml @@ -37,7 +37,6 @@ env: EMAIL_ADDRESS: certadmin@domain.com ENABLE_TLS_INGRESS: true FORCE_TEST_FAIL: false - HAS_SUBDOMAIN: true K8S_TLS_SECRET_NAME: tls-secret KEY_VAULT_NAME: kv-rush-iz6y KEY_VAULT_CERT_NAME: wildcard-thehypepipe-co-uk @@ -68,6 +67,8 @@ jobs: steps: # Checkout + # Reference the major version of a release + # https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-syntax-for-github-actions#example-using-versioned-actions - uses: actions/checkout@v2 # specify different branch # NOT required as I've changed the default branch to develop @@ -92,23 +93,13 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} - # # Trigger workflow via Repository Dispatch - # - name: Trigger start_aks_vmss Workflow - # uses: peter-evans/repository-dispatch@v1 - # with: - # token: ${{ secrets.REPO_ACCESS_TOKEN }} - # repository: adamrushuk/aks-nexus-velero - # event-type: start_vmss - # client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}"}' - # Login - name: Login to Azure run: ./scripts/azure_login.sh - # Ensure VMSS is started, else timeouts will occur on k8s Terraform resource destroy tasks - - name: Start AKS VMSS - run: ./scripts/start_aks_vmss.sh - + # Ensure AKS cluster is running, else timeouts will occur on k8s Terraform resource destroy tasks + - name: Start AKS Cluster + run: ./scripts/start_aks_cluster.sh # Prereqs - name: Lookup Storage Key @@ -122,28 +113,24 @@ jobs: - name: Create zip file of Function App run: pwsh -command "./function_app/CreateFunctionAppZip.ps1" - # Terraform - uses: hashicorp/setup-terraform@v1 with: terraform_version: ${{ env.TF_VERSION }} + - name: Terraform destroy run: | terraform init terraform destroy -no-color -auto-approve working-directory: ${{ env.TF_WORKING_DIR }} - # Cleanup - name: Delete Storage run: ./scripts/storage_delete.sh - # Notify - name: Notify slack env: + SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - uses: pullreminders/slack-action@v1.0.9 - with: - # env var concatenation not supported atm, so hard-code messages - args: '{\"channel\":\"C012ZQHT9A4\",\"text\":\"[aks-nexus-velero] Destroy complete\"}' + run: ./scripts/send_slack_message.sh "[aks-nexus-velero] Destroy complete" diff --git a/.github/workflows/start_aks_vmss.yml b/.github/workflows/start_aks_cluster.yml similarity index 73% rename from .github/workflows/start_aks_vmss.yml rename to .github/workflows/start_aks_cluster.yml index 04d7d12..13a16eb 100644 --- a/.github/workflows/start_aks_vmss.yml +++ b/.github/workflows/start_aks_cluster.yml @@ -1,9 +1,9 @@ -name: Start AKS VMSS +name: Start AKS Cluster on: repository_dispatch: # name of GitHub event that triggers workflow - types: [start_vmss] + types: [start_aks_cluster] # enable manual workflow # https://docs.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow#manually-running-a-workflow @@ -25,7 +25,7 @@ env: ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }} jobs: - start_vmss: + start_aks_cluster: runs-on: ubuntu-18.04 # only run if owner triggered action @@ -46,25 +46,18 @@ jobs: echo "AKS_RG_NAME=${{ env.PREFIX }}-rg-aks-dev-001" >> $GITHUB_ENV echo "AKS_CLUSTER_NAME=${{ env.PREFIX }}-aks-001" >> $GITHUB_ENV - # Show event info - - name: Show triggered event data - run: pwsh -command "./scripts/Get-EventData.ps1" - env: - GITHUB_CONTEXT: ${{ toJson(github) }} - # Login - name: Login to Azure run: ./scripts/azure_login.sh # Start - - name: Start AKS VMSS - run: ./scripts/start_aks_vmss.sh + # Prereqs: https://docs.microsoft.com/en-us/azure/aks/start-stop-cluster + - name: Start AKS Cluster + run: ./scripts/start_aks_cluster.sh # Notify - name: Notify slack env: + SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - uses: pullreminders/slack-action@v1.0.9 - with: - # env var concatenation not supported atm, so hard-code messages - args: '{\"channel\":\"C012ZQHT9A4\",\"text\":\"[aks-nexus-velero] Start AKS VMSS complete\"}' + run: ./scripts/send_slack_message.sh "[aks-nexus-velero] Start AKS Cluster complete" diff --git a/.github/workflows/deallocate_aks_vmss.yml b/.github/workflows/stop_aks_cluster.yml similarity index 71% rename from .github/workflows/deallocate_aks_vmss.yml rename to .github/workflows/stop_aks_cluster.yml index 161e4d3..b7259d7 100644 --- a/.github/workflows/deallocate_aks_vmss.yml +++ b/.github/workflows/stop_aks_cluster.yml @@ -1,9 +1,9 @@ -name: Deallocate AKS VMSS +name: Stop AKS Cluster on: repository_dispatch: # name of GitHub event that triggers workflow - types: [deallocate_vmss] + types: [stop_aks_cluster] # enable manual workflow # https://docs.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow#manually-running-a-workflow @@ -25,7 +25,7 @@ env: ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }} jobs: - deallocate_vmss: + stop_aks_cluster: runs-on: ubuntu-18.04 # only run if owner triggered action @@ -45,25 +45,19 @@ jobs: chmod -R +x ./scripts/ echo "AKS_RG_NAME=${{ env.PREFIX }}-rg-aks-dev-001" >> $GITHUB_ENV echo "AKS_CLUSTER_NAME=${{ env.PREFIX }}-aks-001" >> $GITHUB_ENV - # Show event info - - name: Show triggered event data - run: pwsh -command "./scripts/Get-EventData.ps1" - env: - GITHUB_CONTEXT: ${{ toJson(github) }} # Login - name: Login to Azure run: ./scripts/azure_login.sh - # Deallocate - - name: Deallocate AKS VMSS - run: ./scripts/deallocate_aks_vmss.sh + # Stop + # Prereqs: https://docs.microsoft.com/en-us/azure/aks/start-stop-cluster + - name: Stop AKS Cluster + run: ./scripts/stop_aks_cluster.sh # Notify - name: Notify slack env: + SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - uses: pullreminders/slack-action@v1.0.9 - with: - # env var concatenation not supported atm, so hard-code messages - args: '{\"channel\":\"C012ZQHT9A4\",\"text\":\"[aks-nexus-velero] Deallocate AKS VMSS complete\"}' + run: ./scripts/send_slack_message.sh "[aks-nexus-velero] Stop AKS Cluster complete" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 924fc7d..c8fbc17 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,8 +84,6 @@ jobs: # Notify - name: Notify slack env: + SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - uses: pullreminders/slack-action@v1.0.9 - with: - # env var concatenation not supported atm, so hard-code messages - args: '{\"channel\":\"C012ZQHT9A4\",\"text\":\"[aks-nexus-velero] Test notification\"}' + run: ./scripts/send_slack_message.sh "[aks-nexus-velero] Test notification" diff --git a/.vscode/tasks.json b/.vscode/tasks.json index f0e1763..856af47 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -4,12 +4,11 @@ { "type": "func", "command": "host start", - "problemMatcher": "$func-watch", + "problemMatcher": "$func-powershell-watch", "isBackground": true, "options": { "cwd": "${workspaceFolder}/function_app" - }, - "label": "func host start" + } } ] } diff --git a/ansible/roles/helm_repo/tasks/main.yml b/ansible/roles/helm_repo/tasks/main.yml index 8a1e648..7252a4c 100644 --- a/ansible/roles/helm_repo/tasks/main.yml +++ b/ansible/roles/helm_repo/tasks/main.yml @@ -14,7 +14,7 @@ when: enable_debug_output == "true" - name: Add stable Helm Chart - shell: "helm repo add stable https://kubernetes-charts.storage.googleapis.com/" + shell: "helm repo add stable https://charts.helm.sh/stable/" - name: Download Example Helm Charts from GitHub shell: "helm pull stable/{{ item.name }} --version {{ item.version }}" diff --git a/ansible/roles/init/tasks/main.yml b/ansible/roles/init/tasks/main.yml index 41d2e37..bcebd5b 100644 --- a/ansible/roles/init/tasks/main.yml +++ b/ansible/roles/init/tasks/main.yml @@ -28,8 +28,8 @@ register: result until: - result.status == 200 - # 90 x 10secs = 15mins - retries: 90 + # 30 x 10secs = 5mins + retries: 30 delay: 10 # reset module defaults for this simple check module_defaults: diff --git a/function_app/TimerTrigger1/readme.md b/function_app/TimerTrigger1/readme.md index aecd48e..fa4456c 100644 --- a/function_app/TimerTrigger1/readme.md +++ b/function_app/TimerTrigger1/readme.md @@ -8,6 +8,21 @@ For a `TimerTrigger` to work, you provide a schedule in the form of a [cron expr ## Local Testing +Ensure you have a `local.settings.json` file with the following settings (replacing the placeholders): + +```json +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "", + "FUNCTIONS_WORKER_RUNTIME": "powershell", + "FUNCTIONS_WORKER_RUNTIME_VERSION": "~7", + "IFTTT_WEBHOOK_KEY": "", + "WEEKDAY_ALLOWED_TIME_RANGE": "06:30 -> 08:00" + } +} +``` + Full instructions here: https://docs.microsoft.com/en-us/azure/azure-functions/functions-develop-vs-code?tabs=csharp#run-functions-locally Ensure the [Azure Functions Core Tools are installed](https://docs.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=windows%2Ccsharp%2Cbash#install-the-azure-functions-core-tools), @@ -22,7 +37,7 @@ then follow the steps below to run and test the code locally: ```powershell -<# set to use PowerShell 7 +<# [NO LONGER REQUIRED?] set to use PowerShell 7 $env:FUNCTIONS_WORKER_RUNTIME_VERSION = '~7' #> # enter function directory diff --git a/function_app/host.json b/function_app/host.json index da7f5ab..c1437cb 100644 --- a/function_app/host.json +++ b/function_app/host.json @@ -1,6 +1,13 @@ -// https://docs.microsoft.com/en-us/azure/azure-functions/functions-host-json { "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, "extensionBundle": { "id": "Microsoft.Azure.Functions.ExtensionBundle", "version": "[1.*, 2.0.0)" diff --git a/function_app/profile.ps1 b/function_app/profile.ps1 index 2892cd9..5b999c8 100644 --- a/function_app/profile.ps1 +++ b/function_app/profile.ps1 @@ -13,11 +13,13 @@ # Remove this if you are not planning on using MSI or Azure PowerShell. if ($env:MSI_SECRET -and (Get-Module -ListAvailable Az.Accounts)) { Write-Output "Authenticating PowerShell using Managed Identity..." + # Disable-AzContextAutosave -Scope Process | Out-Null Connect-AzAccount -Identity } elseif ($env:ARM_TENANT_ID -and $env:ARM_SUBSCRIPTION_ID -and $env:ARM_CLIENT_ID -and $env:ARM_CLIENT_SECRET) { # This is used for local development Write-Output "Authenticating PowerShell session using env vars..." + # Disable-AzContextAutosave -Scope Process | Out-Null $servicePrincipleCredential = [pscredential]::new($env:ARM_CLIENT_ID, (ConvertTo-SecureString $env:ARM_CLIENT_SECRET -AsPlainText -Force)) Connect-AzAccount -ServicePrincipal -Tenant $env:ARM_TENANT_ID -Credential $servicePrincipleCredential -Subscription $env:ARM_SUBSCRIPTION_ID -Verbose } diff --git a/function_app/requirements.psd1 b/function_app/requirements.psd1 index ac743d8..35f155b 100644 --- a/function_app/requirements.psd1 +++ b/function_app/requirements.psd1 @@ -2,8 +2,8 @@ # See https://aka.ms/functionsmanageddependency for additional information. # @{ - # 'Az' = '4.*' + # 'Az' = '5.*' # Only need Account and Compute cmdlets for VMSS and VM status checks - 'Az.Accounts' = '1.*' + 'Az.Accounts' = '2.*' 'Az.Compute' = '4.*' } diff --git a/scripts/k8s_manifests_apply.sh b/scripts/k8s_manifests_apply.sh index bc64610..4d67f68 100644 --- a/scripts/k8s_manifests_apply.sh +++ b/scripts/k8s_manifests_apply.sh @@ -22,10 +22,10 @@ echo "FINISHED: $message." # Testing kubectl kubectl version --short -# Apply manifests -message="Applying Kubernetes manifests" -echo "STARTED: $message..." +# # Apply manifests +# message="Applying Kubernetes manifests" +# echo "STARTED: $message..." -# external-dns -kubectl apply -n ingress -f ./manifests/external-dns.yml -echo "FINISHED: $message." +# # external-dns +# kubectl apply -n ingress -f ./manifests/external-dns.yml +# echo "FINISHED: $message." diff --git a/scripts/send_slack_message.sh b/scripts/send_slack_message.sh new file mode 100644 index 0000000..ed5b3d9 --- /dev/null +++ b/scripts/send_slack_message.sh @@ -0,0 +1,41 @@ +#! /usr/bin/env bash +# +# Posts a message to a Slack channel +# +# Usage: +# export SLACK_CHANNEL_ID="" +# export SLACK_BOT_TOKEN="" +# ./send_slack_message.sh "This is a test message" +# +# Configure Bot User OAuth Access Token here: +# https://api.slack.com/apps//oauth? + +# Ensure strict mode and predictable pipeline failure +set -euo pipefail +trap "echo 'error: Script failed: see failed command above'" ERR + +# Check vars +if test -z "$SLACK_CHANNEL_ID"; then + echo "SLACK_CHANNEL_ID variable is missing, please set and try again." + exit 1 +fi + +if test -z "$SLACK_BOT_TOKEN"; then + echo "SLACK_BOT_TOKEN variable is missing, please set and try again." + exit 1 +fi + +if test -z "$1" ; then + echo "No argument supplied for Slack message" +fi + +# Set message JSON data +http_post_data="{\"channel\":\"$SLACK_CHANNEL_ID\",\"text\":\"$1\"}" + +# Send message to Slack API +curl --request POST \ + --header "Content-type: application/json" \ + --header "Authorization: Bearer $SLACK_BOT_TOKEN" \ + --data "$http_post_data" \ + --silent --output /dev/null --show-error --fail \ + https://slack.com/api/chat.postMessage diff --git a/scripts/start_aks_cluster.sh b/scripts/start_aks_cluster.sh new file mode 100644 index 0000000..d7f7a6b --- /dev/null +++ b/scripts/start_aks_cluster.sh @@ -0,0 +1,28 @@ +#! /usr/bin/env bash +# +# Starts AKS Cluster +# +# ensure strict mode and predictable pipeline failure +set -euo pipefail +trap "echo 'error: Script failed: see failed command above'" ERR + +# Prereqs as this is a preview feature: https://docs.microsoft.com/en-us/azure/aks/start-stop-cluster +# Install the aks-preview extension +az extension add --name aks-preview + +# Update the extension to make sure you have the latest version installed +az extension update --name aks-preview + +# Check AKS power state +aks_power_state=$(az aks show --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RG_NAME" --output tsv --query 'powerState.code') +echo -e "\n[$AKS_CLUSTER_NAME] AKS Cluster power state is [$aks_power_state]." + +if [ "$aks_power_state" == "Running" ]; then + echo -e "\nSKIPPING: $AKS_CLUSTER_NAME AKS Cluster is already running." +else + # Start AKS Cluster + message="Starting AKS Cluster: [$AKS_CLUSTER_NAME]" + echo -e "\nSTARTED: $message..." + az aks start --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RG_NAME" + echo -e "FINISHED: $message." +fi diff --git a/scripts/stop_aks_cluster.sh b/scripts/stop_aks_cluster.sh new file mode 100644 index 0000000..83bd3e5 --- /dev/null +++ b/scripts/stop_aks_cluster.sh @@ -0,0 +1,28 @@ +#! /usr/bin/env bash +# +# Stops AKS Cluster +# +# ensure strict mode and predictable pipeline failure +set -euo pipefail +trap "echo 'error: Script failed: see failed command above'" ERR + +# Prereqs as this is a preview feature: https://docs.microsoft.com/en-us/azure/aks/start-stop-cluster +# Install the aks-preview extension +az extension add --name aks-preview + +# Update the extension to make sure you have the latest version installed +az extension update --name aks-preview + +# Check AKS power state +aks_power_state=$(az aks show --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RG_NAME" --output tsv --query 'powerState.code') +echo -e "\n[$AKS_CLUSTER_NAME] AKS Cluster power state is [$aks_power_state]." + +if [ "$aks_power_state" == "Stopped" ]; then + echo -e "\nSKIPPING: $AKS_CLUSTER_NAME AKS Cluster is already stopped." +else + # Stop AKS Cluster + message="Stopping AKS Cluster: [$AKS_CLUSTER_NAME]" + echo -e "\nSTARTED: $message..." + az aks stop --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RG_NAME" + echo -e "FINISHED: $message." +fi diff --git a/terraform/dns.tf b/terraform/dns.tf index a5ebed0..438ecf8 100644 --- a/terraform/dns.tf +++ b/terraform/dns.tf @@ -1,6 +1,6 @@ # DNS data "azurerm_resource_group" "dns" { - name = var.dns_resource_group_name + name = var.dns_resource_group_name } data "azurerm_dns_zone" "dns" { @@ -8,74 +8,126 @@ data "azurerm_dns_zone" "dns" { resource_group_name = data.azurerm_resource_group.dns.name } - -# Service Principle for external-dns k8s deployment -resource "azuread_application" "aks_dns_sp" { - name = var.dns_service_principle_name -} - -resource "azuread_service_principal" "aks_dns_sp" { - application_id = azuread_application.aks_dns_sp.application_id -} - -resource "random_string" "aks_dns_sp" { - length = 16 - special = true - keepers = { - service_principal = azuread_service_principal.aks_dns_sp.id - } -} - -resource "azuread_service_principal_password" "aks_dns_sp" { - service_principal_id = azuread_service_principal.aks_dns_sp.id - value = random_string.aks_dns_sp.result - end_date_relative = "8760h" # 8760h = 1 year - - lifecycle { - ignore_changes = [end_date] - } +# external-dns managed identity +resource "azurerm_user_assigned_identity" "external_dns" { + resource_group_name = azurerm_kubernetes_cluster.aks.node_resource_group + location = azurerm_kubernetes_cluster.aks.location + name = "mi-external-dns" } - -# Service Principle role assignments # reader on dns resource group -resource "azurerm_role_assignment" "aks_dns_sp_to_rg" { - principal_id = azuread_service_principal.aks_dns_sp.id +resource "azurerm_role_assignment" "aks_dns_mi_to_rg" { + principal_id = azurerm_user_assigned_identity.external_dns.principal_id role_definition_name = "Reader" scope = data.azurerm_dns_zone.dns.id skip_service_principal_aad_check = true - depends_on = [azuread_service_principal_password.aks_dns_sp] } # contributor on dns zone -resource "azurerm_role_assignment" "aks_dns_sp_to_zone" { - principal_id = azuread_service_principal.aks_dns_sp.id +resource "azurerm_role_assignment" "aks_dns_mi_to_zone" { + principal_id = azurerm_user_assigned_identity.external_dns.principal_id role_definition_name = "Contributor" scope = data.azurerm_resource_group.dns.id skip_service_principal_aad_check = true - depends_on = [azuread_service_principal_password.aks_dns_sp] } -# Kuberenetes Secret for external-dns -resource "kubernetes_secret" "external_dns" { +resource "kubernetes_namespace" "external_dns" { metadata { - name = "azure-config-file" - namespace = "ingress" + name = "external-dns" + } + timeouts { + delete = "15m" } - data = { - "azure.json" = </ diff --git a/terraform/helm_aad_pod_identity.tf b/terraform/helm_aad_pod_identity.tf index d36e336..71e35ca 100644 --- a/terraform/helm_aad_pod_identity.tf +++ b/terraform/helm_aad_pod_identity.tf @@ -3,7 +3,6 @@ # role assignment for aad-pod-identity # https://azure.github.io/aad-pod-identity/docs/getting-started/role-assignment/#performing-role-assignments resource "azurerm_role_assignment" "aks_mi_aks_node_rg_vm_contributor" { - count = var.velero_enabled ? 1 : 0 principal_id = azurerm_kubernetes_cluster.aks.kubelet_identity[0].object_id role_definition_name = "Virtual Machine Contributor" scope = data.azurerm_resource_group.aks_node_rg.id @@ -11,24 +10,13 @@ resource "azurerm_role_assignment" "aks_mi_aks_node_rg_vm_contributor" { } resource "azurerm_role_assignment" "aks_mi_aks_node_rg_mi_operator" { - count = var.velero_enabled ? 1 : 0 principal_id = azurerm_kubernetes_cluster.aks.kubelet_identity[0].object_id role_definition_name = "Managed Identity Operator" scope = data.azurerm_resource_group.aks_node_rg.id skip_service_principal_aad_check = true } -# velero user MI in different RG, so assign role there too -resource "azurerm_role_assignment" "aks_mi_velero_rg_mi_operator" { - count = var.velero_enabled ? 1 : 0 - principal_id = azurerm_kubernetes_cluster.aks.kubelet_identity[0].object_id - role_definition_name = "Managed Identity Operator" - scope = azurerm_resource_group.velero[0].id - skip_service_principal_aad_check = true -} - data "template_file" "azureIdentities" { - count = var.velero_enabled ? 1 : 0 template = file("${path.module}/files/azureIdentities.yaml.tpl") vars = { resourceID = azurerm_user_assigned_identity.velero[0].id @@ -38,7 +26,6 @@ data "template_file" "azureIdentities" { # https://www.terraform.io/docs/providers/kubernetes/r/namespace.html resource "kubernetes_namespace" "aad_pod_identity" { - count = var.velero_enabled ? 1 : 0 metadata { name = "aad-pod-identity" } @@ -51,7 +38,6 @@ resource "kubernetes_namespace" "aad_pod_identity" { # https://www.terraform.io/docs/providers/helm/r/release.html resource "helm_release" "aad_pod_identity" { - count = var.velero_enabled ? 1 : 0 chart = "aad-pod-identity" name = "aad-pod-identity" namespace = "aad-pod-identity" @@ -60,7 +46,7 @@ resource "helm_release" "aad_pod_identity" { values = [ file("helm/aad_pod_identity_values.yaml"), - data.template_file.azureIdentities[0].rendered + data.template_file.azureIdentities.rendered ] set { @@ -68,6 +54,12 @@ resource "helm_release" "aad_pod_identity" { value = "true" } + # https://github.com/Azure/aad-pod-identity/wiki/Debugging#increasing-the-verbosity-of-the-logs + set { + name = "mic.logVerbosity" + value = 6 + } + timeout = 600 - depends_on = [kubernetes_namespace.aad_pod_identity[0]] + depends_on = [kubernetes_namespace.aad_pod_identity] } diff --git a/terraform/helm_akv2k8s.tf b/terraform/helm_akv2k8s.tf index 96dbf36..1560e08 100644 --- a/terraform/helm_akv2k8s.tf +++ b/terraform/helm_akv2k8s.tf @@ -41,8 +41,8 @@ resource "local_file" "kubeconfig" { resource "null_resource" "akv2k8s_crds" { triggers = { # always_run = "${timestamp()}" - akv2k8s_yaml_contents = filemd5(var.akv2k8s_yaml_path) - cert_sync_yaml_contents = filemd5(var.cert_sync_yaml_path) + akv2k8s_yaml_contents = filemd5(var.akv2k8s_yaml_path) + cert_sync_yaml_contents = filemd5(var.cert_sync_yaml_path) } provisioner "local-exec" { @@ -85,7 +85,11 @@ resource "null_resource" "akv2k8s_exceptions" { EOT } - depends_on = [local_file.kubeconfig, kubernetes_namespace.akv2k8s] + depends_on = [ + local_file.kubeconfig, + kubernetes_namespace.akv2k8s, + helm_release.aad_pod_identity + ] } # https://www.terraform.io/docs/providers/helm/r/release.html @@ -98,7 +102,7 @@ resource "helm_release" "akv2k8s" { version = var.akv2k8s_chart_version set { - name = "logLevel" + name = "controller.logLevel" value = "debug" } diff --git a/terraform/helm_nginx.tf b/terraform/helm_nginx.tf index fe64308..713fc04 100644 --- a/terraform/helm_nginx.tf +++ b/terraform/helm_nginx.tf @@ -23,27 +23,3 @@ resource "helm_release" "nginx" { timeout = 600 depends_on = [kubernetes_namespace.ingress] } - -# ? Removed as now using kubernetes external-dns -# ? keeping for reference of dns update script usage -# https://www.terraform.io/docs/provisioners/local-exec.html -# resource "null_resource" "update_dns" { -# # triggers = { -# # always_run = "${timestamp()}" -# # } - -# provisioner "local-exec" { -# command = "./Update-Dns.ps1" -# environment = { -# aks_rg = azurerm_kubernetes_cluster.aks.resource_group_name -# aks_cluster_name = azurerm_kubernetes_cluster.aks.name -# dns_domain_name = var.dns_domain_name -# has_subdomain = var.has_subdomain -# api_key = var.api_key -# api_secret = var.api_secret -# } -# interpreter = ["pwsh", "-NonInteractive", "-Command"] -# working_dir = "${path.module}/../scripts/" -# } -# depends_on = [helm_release.nginx] -# } diff --git a/terraform/providers.tf b/terraform/providers.tf index 376a756..32ccfb2 100644 --- a/terraform/providers.tf +++ b/terraform/providers.tf @@ -30,7 +30,7 @@ terraform { # must include blank features block # https://github.com/terraform-providers/terraform-provider-azurerm/releases provider "azurerm" { - version = "2.32.0" + version = "2.34.0" features {} } diff --git a/terraform/variables.tf b/terraform/variables.tf index f69b3ec..bbdf248 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -17,12 +17,12 @@ variable "kubernetes_version" { # https://github.com/kubernetes/ingress-nginx/releases # https://github.com/kubernetes/ingress-nginx/blob/master/charts/ingress-nginx/Chart.yaml#L3 variable "nginx_chart_version" { - default = "3.4.0" + default = "3.7.1" } # https://hub.helm.sh/charts/jetstack/cert-manager variable "cert_manager_chart_version" { - default = "v1.0.3" + default = "v1.0.4" } # https://github.com/vmware-tanzu/helm-charts/releases @@ -44,13 +44,19 @@ variable "nexus_chart_version" { # https://github.com/SparebankenVest/helm-charts/tree/gh-pages/akv2k8s # https://github.com/SparebankenVest/public-helm-charts/blob/master/stable/akv2k8s/Chart.yaml#L5 variable "akv2k8s_chart_version" { - default = "1.1.25" + default = "1.1.26" } # https://github.com/Azure/aad-pod-identity/blob/master/charts/aad-pod-identity/Chart.yaml#L4 variable "aad_pod_identity_chart_version" { default = "2.0.2" } + +# https://bitnami.com/stack/external-dns/helm +# https://github.com/bitnami/charts/blob/master/bitnami/external-dns/Chart.yaml#L3 +variable "external_dns_chart_version" { + default = "3.5.0" +} #endregion Versions @@ -137,15 +143,15 @@ variable "agent_pool_node_count" { } variable "agent_pool_enable_auto_scaling" { - default = true + default = false } variable "agent_pool_node_min_count" { - default = 1 + default = null } variable "agent_pool_node_max_count" { - default = 3 + default = null } variable "agent_pool_profile_name" { @@ -207,10 +213,6 @@ variable "velero_backup_included_namespaces" { # DNS -variable "dns_service_principle_name" { - default = "sp_external_dns" -} - variable "dns_resource_group_name" { default = "__DNS_RG_NAME__" } @@ -219,38 +221,11 @@ variable "dns_zone_name" { default = "__ROOT_DOMAIN_NAME__" } -# not currently used as zone defaults to these anyway -variable "dns_name_servers" { - type = list(string) - default = [ - "ns1-07.azure-dns.com.", - "ns2-07.azure-dns.net.", - "ns3-07.azure-dns.org.", - "ns4-07.azure-dns.info." - ] +variable "azureidentity_external_dns_yaml_path" { + default = "files/azureIdentity-external-dns.yaml.tpl" } -# ? Removed as now using kubernetes external-dns -# ? keeping for reference of dns update script usage -# # DNS update script vars -# variable "dns_domain_name" { -# default = "__DNS_DOMAIN_NAME__" -# } - -# variable "has_subdomain" { -# default = "__HAS_SUBDOMAIN__" -# } - -# variable "api_key" { -# default = "__API_KEY__" -# } - -# variable "api_secret" { -# default = "__API_SECRET__" -# } - - # Function Apps variable "func_app_sas_expires_in_hours" { @@ -288,7 +263,6 @@ variable "nexus_tls_secret_name" { # akv2k8s -# TODO: is this CRD file required? variable "akv2k8s_yaml_path" { default = "files/AzureKeyVaultSecret.yaml" } diff --git a/terraform/velero_mi_auth.tf b/terraform/velero_mi_auth.tf index 52a99e7..a34c41c 100644 --- a/terraform/velero_mi_auth.tf +++ b/terraform/velero_mi_auth.tf @@ -1,10 +1,9 @@ # velero managed identity auth resource "azurerm_user_assigned_identity" "velero" { count = var.velero_enabled ? 1 : 0 - resource_group_name = azurerm_resource_group.velero[0].name - location = azurerm_resource_group.velero[0].location - - name = "mi_velero" + resource_group_name = azurerm_kubernetes_cluster.aks.node_resource_group + location = azurerm_kubernetes_cluster.aks.location + name = "mi-velero" } # assign velero MI contributor rights to velero storage RG diff --git a/velero/Deploy-Velero.ps1 b/velero/Deploy-Velero.ps1 index e076a92..1aedb76 100644 --- a/velero/Deploy-Velero.ps1 +++ b/velero/Deploy-Velero.ps1 @@ -108,7 +108,6 @@ if ($helmReleaseName -in $helmDeployedList.Releases.Name) { # # OPTION 2 - YAML file # https://github.com/vmware-tanzu/helm-charts/tree/master/charts/velero#option-2-yaml-file # still use '--set` for dynamic values - # TODO add splatting for dynamic usage of debug flags # https://stackoverflow.com/questions/52854092/how-to-use-powershell-splatting-for-azure-cli helm install vmware-tanzu/velero ` --name velero ` diff --git a/velero/velero-values.yaml b/velero/velero-values.yaml index cc42f9a..3f4f99b 100644 --- a/velero/velero-values.yaml +++ b/velero/velero-values.yaml @@ -89,10 +89,9 @@ configuration: # Additional provider-specific configuration. See link above # for details of required/optional fields for your provider. config: {} - # TODO: complete below same as "$backupResourceGroupName,storageAccount=$storageAccountName" in CLI # config: - # resourceGroup: TODO - # storageAccount: TODO + # resourceGroup: + # storageAccount: # region: # s3ForcePathStyle: # s3Url: