Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure ContainerHealthy condition is set back to True #15503

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/reconciler/autoscaling/hpa/hpa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func TestReconcile(t *testing.T) {
Name: "nop deletion reconcile",
// Test that with a DeletionTimestamp we do nothing.
Objects: []runtime.Object{
pa(testNamespace, testRevision, WithHPAClass, WithPADeletionTimestamp),
WithDeletionTimestamp(pa(testNamespace, testRevision, WithHPAClass)),
deploy(testNamespace, testRevision),
},
Key: key(testNamespace, testRevision),
Expand Down
9 changes: 8 additions & 1 deletion pkg/reconciler/revision/reconcile_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)

// If a container keeps crashing (no active pods in the deployment although we want some)
if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 {
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)})
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{
LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector),
Limit: 1,
})
if err != nil {
logger.Errorw("Error getting pods", zap.Error(err))
return nil
Expand Down Expand Up @@ -117,6 +120,10 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)
}
}

if deployment.Status.ReadyReplicas > 0 {
rev.Status.MarkContainerHealthyTrue()
}

return nil
}

Expand Down
79 changes: 77 additions & 2 deletions pkg/reconciler/revision/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
tracingconfig "knative.dev/pkg/tracing/config"
autoscalingv1alpha1 "knative.dev/serving/pkg/apis/autoscaling/v1alpha1"
defaultconfig "knative.dev/serving/pkg/apis/config"
"knative.dev/serving/pkg/apis/serving"
v1 "knative.dev/serving/pkg/apis/serving/v1"
"knative.dev/serving/pkg/autoscaler/config/autoscalerconfig"
servingclient "knative.dev/serving/pkg/client/injection/client"
Expand Down Expand Up @@ -544,7 +545,7 @@ func TestReconcile(t *testing.T) {
WithRoutingState(v1.RoutingStateActive, fc),
),
pa("foo", "pull-backoff", WithReachabilityReachable), // pa can't be ready since deployment times out.
pod(t, "foo", "pull-backoff", WithWaitingContainer("pull-backoff", "ImagePullBackoff", "can't pull it")),
pod(t, "foo", "pull-backoff", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithWaitingContainer("pull-backoff", "ImagePullBackoff", "can't pull it")),
timeoutDeploy(deploy(t, "foo", "pull-backoff"), "Timed out!"),
image("foo", "pull-backoff"),
},
Expand All @@ -570,7 +571,7 @@ func TestReconcile(t *testing.T) {
WithRoutingState(v1.RoutingStateActive, fc),
WithLogURL, allUnknownConditions, MarkActive),
pa("foo", "pod-error", WithReachabilityReachable), // PA can't be ready, since no traffic.
pod(t, "foo", "pod-error", WithFailingContainer("pod-error", 5, "I failed man!")),
pod(t, "foo", "pod-error", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithFailingContainer("pod-error", 5, "I failed man!")),
deploy(t, "foo", "pod-error"),
image("foo", "pod-error"),
},
Expand Down Expand Up @@ -744,6 +745,60 @@ func TestReconcile(t *testing.T) {
PodSpecPersistentVolumeClaim: defaultconfig.Enabled,
PodSpecPersistentVolumeWrite: defaultconfig.Enabled,
}}),
}, {
Name: "revision's ContainerHealthy turns back to True if the deployment is healthy",
Objects: []runtime.Object{
Revision("foo", "container-unhealthy",
WithLogURL,
MarkRevisionReady,
withDefaultContainerStatuses(),
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
MarkContainerHealthyFalse("ExitCode137"),
),
pa("foo", "container-unhealthy",
WithPASKSReady,
WithScaleTargetInitialized,
WithTraffic,
WithReachabilityReachable,
WithPAStatusService("something"),
),
readyDeploy(deploy(t, "foo", "container-unhealthy", withReplicas(1))),
image("foo", "container-unhealthy"),
},
Key: "foo/container-unhealthy",
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: Revision("foo", "container-unhealthy",
WithLogURL,
MarkRevisionReady,
withDefaultContainerStatuses(),
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
),
}},
WantEvents: []string{
Eventf(corev1.EventTypeNormal, "RevisionReady", "Revision becomes ready upon all resources being ready"),
},
}, {
Name: "revision's ContainerHealthy stays True even if the last Pod with restarts terminates",
Objects: []runtime.Object{
Revision("foo", "container-healthy",
WithLogURL,
MarkRevisionReady,
withDefaultContainerStatuses(),
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
MarkContainerHealthyTrue(),
),
pa("foo", "container-healthy",
WithPASKSReady,
WithScaleTargetInitialized,
WithTraffic,
WithReachabilityReachable,
WithPAStatusService("something"),
),
WithDeletionTimestamp(pod(t, "foo", "container-healthy", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithFailingContainer("crashed-at-some-point", 128, "OOMKilled"))),
readyDeploy(deploy(t, "foo", "container-healthy", withReplicas(0))),
image("foo", "container-healthy"),
},
Key: "foo/container-healthy",
}}

table.Test(t, MakeFactory(func(ctx context.Context, listers *Listers, _ configmap.Watcher) controller.Reconciler {
Expand Down Expand Up @@ -853,6 +908,19 @@ func allUnknownConditions(r *v1.Revision) {

type configOption func(*config.Config)

type deploymentOption func(*appsv1.Deployment)

// withReplicas configures the number of replicas on the Deployment
func withReplicas(replicas int32) deploymentOption {
return func(d *appsv1.Deployment) {
d.Spec.Replicas = &replicas
d.Status.AvailableReplicas = replicas
d.Status.ReadyReplicas = replicas
d.Status.Replicas = replicas
d.Status.UpdatedReplicas = replicas
}
}

func deploy(t *testing.T, namespace, name string, opts ...interface{}) *appsv1.Deployment {
t.Helper()
cfg := reconcilerTestConfig()
Expand All @@ -878,6 +946,13 @@ func deploy(t *testing.T, namespace, name string, opts ...interface{}) *appsv1.D
if err != nil {
t.Fatal("failed to create deployment")
}

for _, opt := range opts {
if deploymentOpt, ok := opt.(deploymentOption); ok {
deploymentOpt(deployment)
}
}

return deployment
}

Expand Down
26 changes: 23 additions & 3 deletions pkg/testing/functional.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ func WithNoTraffic(reason, message string) PodAutoscalerOption {
}
}

// WithPADeletionTimestamp will set the DeletionTimestamp on the PodAutoscaler.
func WithPADeletionTimestamp(r *autoscalingv1alpha1.PodAutoscaler) {
// WithDeletionTimestamp will set the DeletionTimestamp on the object.
func WithDeletionTimestamp[T metav1.Object](obj T) T {
SaschaSchwarze0 marked this conversation as resolved.
Show resolved Hide resolved
t := metav1.NewTime(time.Unix(1e9, 0))
r.ObjectMeta.SetDeletionTimestamp(&t)
obj.SetDeletionTimestamp(&t)
return obj
}

// WithHPAClass updates the PA to add the hpa class annotation.
Expand Down Expand Up @@ -288,6 +289,25 @@ func WithEndpointsOwnersRemoved(eps *corev1.Endpoints) {
// PodOption enables further configuration of a Pod.
type PodOption func(*corev1.Pod)

// WithPodCondition sets a condition in the status
func WithPodCondition(conditionType corev1.PodConditionType, status corev1.ConditionStatus, reason string) PodOption {
return func(pod *corev1.Pod) {
for i, condition := range pod.Status.Conditions {
if condition.Type == conditionType {
pod.Status.Conditions[i].Status = status
pod.Status.Conditions[i].Reason = reason
return
}
}

pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{
Type: conditionType,
Status: status,
Reason: reason,
})
}
}

// WithFailingContainer sets the .Status.ContainerStatuses on the pod to
// include a container named accordingly to fail with the given state.
func WithFailingContainer(name string, exitCode int, message string) PodOption {
Expand Down
15 changes: 15 additions & 0 deletions pkg/testing/v1/revision.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,27 @@ func MarkDeploying(reason string) RevisionOption {
}
}

// MarkContainerHealthyUnknown changes the ContainerHealthy condition to Unknown with the given reason
func MarkContainerHealthyUnknown(reason string) RevisionOption {
return func(r *v1.Revision) {
r.Status.MarkContainerHealthyUnknown(reason, "")
}
}

// MarkContainerHealthyFalse changes the ContainerHealthy condition to False with the given reason
func MarkContainerHealthyFalse(reason string) RevisionOption {
return func(r *v1.Revision) {
r.Status.MarkContainerHealthyFalse(reason, "")
}
}

// MarkContainerHealthyTrue changes the ContainerHealthy condition to True
func MarkContainerHealthyTrue() RevisionOption {
return func(r *v1.Revision) {
r.Status.MarkContainerHealthyTrue()
}
}

// MarkProgressDeadlineExceeded calls the method of the same name on the Revision
// with the message we expect the Revision Reconciler to pass.
func MarkProgressDeadlineExceeded(message string) RevisionOption {
Expand Down
Loading