knative · knative-prow · Jun 28, 2024 · Jun 27, 2024 · Jun 28, 2024
diff --git a/test/ha/workload_test.go b/test/ha/workload_test.go
@@ -23,79 +23,77 @@ import (
 	"context"
 	"strconv"
 	"testing"
+	"time"
 
+	corev1 "k8s.io/api/core/v1"
+	apierrs "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	pkgTest "knative.dev/pkg/test"
 	"knative.dev/serving/pkg/apis/autoscaling"
 	"knative.dev/serving/pkg/apis/serving"
+	revnames "knative.dev/serving/pkg/reconciler/revision/resources/names"
 	rtesting "knative.dev/serving/pkg/testing/v1"
 	"knative.dev/serving/test"
 	"knative.dev/serving/test/e2e"
-	v1test "knative.dev/serving/test/v1"
 )
 
 const (
-	minimumNumberOfReplicas = 2
-	maximumNumberOfReplicas = 2
-	deploymentSuffix        = "-deployment"
-	repetitionCount         = 5
+	repetitionCount = 4
 )
 
-func TestActivatorNotInRequestPath(t *testing.T) {
-	t.Parallel()
-	clients := e2e.Setup(t)
-	ctx := context.Background()
-
-	// Create first service that we will continually probe during disruption scenario.
-	names, resources := createPizzaPlanetService(t,
-		rtesting.WithConfigAnnotations(map[string]string{
-			autoscaling.MinScaleAnnotationKey:  strconv.Itoa(minimumNumberOfReplicas), // Make sure we don't scale to zero during the test.
-			autoscaling.MaxScaleAnnotationKey:  strconv.Itoa(maximumNumberOfReplicas),
-			autoscaling.TargetBurstCapacityKey: "0", // The Activator is only added to the request path during scale from zero scenarios.
-		}),
-	)
-	test.EnsureTearDown(t, clients, &names)
-
-	testUptimeDuringUserPodDeletion(t, ctx, clients, names, resources)
-}
+func TestWorkloadHA(t *testing.T) {
+	const minReplicas = 2
 
-func TestActivatorInRequestPathAlways(t *testing.T) {
 	t.Parallel()
-	clients := e2e.Setup(t)
-	ctx := context.Background()
-
-	// Create first service that we will continually probe during disruption scenario.
-	names, resources := createPizzaPlanetService(t,
-		rtesting.WithConfigAnnotations(map[string]string{
-			autoscaling.MinScaleAnnotationKey:  strconv.Itoa(minimumNumberOfReplicas), // Make sure we don't scale to zero during the test.
-			autoscaling.MaxScaleAnnotationKey:  strconv.Itoa(maximumNumberOfReplicas),
-			autoscaling.TargetBurstCapacityKey: "-1", // Make sure all requests go through the activator.
-		}),
-	)
-	test.EnsureTearDown(t, clients, &names)
-
-	testUptimeDuringUserPodDeletion(t, ctx, clients, names, resources)
-}
-
-func testUptimeDuringUserPodDeletion(t *testing.T, ctx context.Context, clients *test.Clients, names test.ResourceNames, resources *v1test.ResourceObjects) {
-	t.Log("Starting prober")
-	prober := test.NewProberManager(t.Logf, clients, minProbes, test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS))
-
-	prober.Spawn(resources.Service.Status.URL.URL())
-	defer assertSLO(t, prober, 1)
-
-	deploymentName := names.Revision + deploymentSuffix
-	if err := pkgTest.WaitForDeploymentScale(ctx, clients.KubeClient, deploymentName, test.ServingFlags.TestNamespace, minimumNumberOfReplicas); err != nil {
-		t.Fatalf("Deployment %s not scaled to %d: %v", deploymentName, minimumNumberOfReplicas, err)
-	}
-
-	for i := 0; i < repetitionCount; i++ {
-		deleteUserPods(t, ctx, clients, names.Service)
-	}
 
-	if err := pkgTest.WaitForDeploymentScale(ctx, clients.KubeClient, deploymentName, test.ServingFlags.TestNamespace, minimumNumberOfReplicas); err != nil {
-		t.Errorf("Deployment %s not scaled to %d: %v", deploymentName, minimumNumberOfReplicas, err)
+	cases := []struct {
+		name string
+		tbc  string
+	}{{
+		name: "activator-in-path",
+		tbc:  "-1",
+	}, {
+		name: "activator-not-in-path",
+		tbc:  "0",
+	}}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			clients := e2e.Setup(t)
+			ctx := context.Background()
+
+			// Create first service that we will continually probe during disruption scenario.
+			names, resources := createPizzaPlanetService(t,
+				rtesting.WithConfigAnnotations(map[string]string{
+					autoscaling.MinScaleAnnotationKey:  strconv.Itoa(minReplicas),
+					autoscaling.MaxScaleAnnotationKey:  strconv.Itoa(minReplicas),
+					autoscaling.TargetBurstCapacityKey: tc.tbc, // The Activator is only added to the request path during scale from zero scenarios.
+				}),
+			)
+
+			test.EnsureTearDown(t, clients, &names)
+			t.Log("Starting prober")
+
+			deploymentName := revnames.Deployment(resources.Revision)
+			if err := pkgTest.WaitForDeploymentScale(ctx, clients.KubeClient, deploymentName, test.ServingFlags.TestNamespace, minReplicas); err != nil {
+				t.Fatalf("Deployment %s not scaled to %v: %v", deploymentName, minReplicas, err)
+			}
+
+			prober := test.NewProberManager(t.Logf, clients, minProbes, test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS))
+			prober.Spawn(resources.Service.Status.URL.URL())
+			defer assertSLO(t, prober, 1)
+
+			for i := 0; i < repetitionCount; i++ {
+				deleteUserPods(t, ctx, clients, names.Service)
+			}
+
+			if err := pkgTest.WaitForDeploymentScale(ctx, clients.KubeClient, deploymentName, test.ServingFlags.TestNamespace, minReplicas); err != nil {
+				t.Errorf("Deployment %s not scaled to %v: %v", deploymentName, minReplicas, err)
+			}
+		})
 	}
 }
 
@@ -110,13 +108,26 @@ func deleteUserPods(t *testing.T, ctx context.Context, clients *test.Clients, se
 	}
 
 	t.Logf("Deleting user pods")
-	for _, pod := range pods.Items {
-		err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
+	for i, pod := range pods.Items {
+		t.Logf("Deleting pod %q (%v of %v)", pod.Name, i+1, len(pods.Items))
+		err := clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
 		if err != nil {
 			t.Fatalf("Unable to delete pod: %v", err)
 		}
-		if err := pkgTest.WaitForPodDeleted(context.Background(), clients.KubeClient, pod.Name, test.ServingFlags.TestNamespace); err != nil {
+
+		if err := pkgTest.WaitForPodState(ctx, clients.KubeClient, func(p *corev1.Pod) (bool, error) {
+			// Always return false. We're oly interested in the error which indicates pod deletion or timeout.
+			t.Logf("%q still not deleted - %s", p.Name, time.Now().String())
+			return false, nil
+		}, pod.Name, pod.Namespace); err != nil {
+			if !apierrs.IsNotFound(err) {
+				t.Fatalf("expected pod to not be found")
+			}
+		}
+
+		if err := pkgTest.WaitForPodDeleted(context.Background(), clients.KubeClient, pod.Name, pod.Namespace); err != nil {
 			t.Fatalf("Did not observe %s to actually be deleted: %v", pod.Name, err)
 		}
+		t.Logf("Deleting pod %q finished", pod.Name)
 	}
 }