From 5303dc0003fe7870da97914668f9157cd9f98afd Mon Sep 17 00:00:00 2001 From: Sascha Schwarze Date: Fri, 19 Jul 2024 14:25:17 +0200 Subject: [PATCH] Add feature flag treat-pod-as-always-schedulable The feature flag allows to declare that Pods in the system will eventually all get scheduled and Revisions should therefore not be marked unschedulable Signed-off-by: Sascha Schwarze --- config/core/configmaps/features.yaml | 11 +++++- pkg/apis/config/features.go | 3 ++ pkg/apis/config/features_test.go | 29 +++++++++++++++ .../revision/reconcile_resources.go | 12 ++++--- pkg/reconciler/revision/table_test.go | 35 ++++++++++++++++++- 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/config/core/configmaps/features.yaml b/config/core/configmaps/features.yaml index 51830a89b6d5..8874697e35a7 100644 --- a/config/core/configmaps/features.yaml +++ b/config/core/configmaps/features.yaml @@ -22,7 +22,7 @@ metadata: app.kubernetes.io/component: controller app.kubernetes.io/version: devel annotations: - knative.dev/example-checksum: "63a13754" + knative.dev/example-checksum: "0ac06704" data: _example: |- ################################ @@ -242,3 +242,12 @@ data: # Default queue proxy resource requests and limits to good values for most cases if set. queueproxy.resource-defaults: "disabled" + + # treat-pod-as-always-schedulable can be used to define that Pods in the system will always be + # scheduled, and a Revision should not be marked unschedulable. + # Setting this to `enabled` makes sense if you have cluster-autoscaling set up for you cluster + # where unschedulable Pods trigger the addition of a new Node and are therefore a short and + # transient state. + # + # See https://github.com/knative/serving/issues/14862 + treat-pod-as-always-schedulable: "disabled" diff --git a/pkg/apis/config/features.go b/pkg/apis/config/features.go index 56db93419c74..1c514a3511af 100644 --- a/pkg/apis/config/features.go +++ b/pkg/apis/config/features.go @@ -109,6 +109,7 @@ func defaultFeaturesConfig() *Features { SecurePodDefaults: Disabled, TagHeaderBasedRouting: Disabled, AutoDetectHTTP2: Disabled, + TreatPodAsAlwaysSchedulable: Disabled, } } @@ -126,6 +127,7 @@ func NewFeaturesConfigFromMap(data map[string]string) (*Features, error) { asFlag("queueproxy.resource-defaults", &nc.QueueProxyResourceDefaults), asFlag("secure-pod-defaults", &nc.SecurePodDefaults), asFlag("tag-header-based-routing", &nc.TagHeaderBasedRouting), + asFlag("treat-pod-as-always-schedulable", &nc.TreatPodAsAlwaysSchedulable), asFlag(FeatureContainerSpecAddCapabilities, &nc.ContainerSpecAddCapabilities), asFlag(FeaturePodSpecAffinity, &nc.PodSpecAffinity), asFlag(FeaturePodSpecDNSConfig, &nc.PodSpecDNSConfig), @@ -191,6 +193,7 @@ type Features struct { SecurePodDefaults Flag TagHeaderBasedRouting Flag AutoDetectHTTP2 Flag + TreatPodAsAlwaysSchedulable Flag } // asFlag parses the value at key as a Flag into the target, if it exists. diff --git a/pkg/apis/config/features_test.go b/pkg/apis/config/features_test.go index 6a1762d892cf..479b1557bb48 100644 --- a/pkg/apis/config/features_test.go +++ b/pkg/apis/config/features_test.go @@ -80,6 +80,7 @@ func TestFeaturesConfiguration(t *testing.T) { SecurePodDefaults: Enabled, QueueProxyResourceDefaults: Enabled, TagHeaderBasedRouting: Enabled, + TreatPodAsAlwaysSchedulable: Enabled, }), data: map[string]string{ "multi-container": "Enabled", @@ -103,6 +104,7 @@ func TestFeaturesConfiguration(t *testing.T) { "secure-pod-defaults": "Enabled", "queueproxy.resource-defaults": "Enabled", "tag-header-based-routing": "Enabled", + "treat-pod-as-always-schedulable": "Enabled", }, }, { name: "multi-container Allowed", @@ -671,6 +673,33 @@ func TestFeaturesConfiguration(t *testing.T) { data: map[string]string{ "kubernetes.podspec-hostnetwork": "Disabled", }, + }, { + name: "treat-pod-as-always-schedulable Allowed", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Allowed, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Allowed", + }, + }, { + name: "treat-pod-as-always-schedulable Enabled", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Enabled, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Enabled", + }, + }, { + name: "treat-pod-as-always-schedulable Disabled", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Disabled, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Disabled", + }, }} for _, tt := range configTests { diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go index 3fa6f07d24fe..39939b1f88f5 100644 --- a/pkg/reconciler/revision/reconcile_resources.go +++ b/pkg/reconciler/revision/reconcile_resources.go @@ -36,6 +36,7 @@ import ( "knative.dev/pkg/kmp" "knative.dev/pkg/logging" "knative.dev/pkg/logging/logkey" + apicfg "knative.dev/serving/pkg/apis/config" v1 "knative.dev/serving/pkg/apis/serving/v1" "knative.dev/serving/pkg/networking" "knative.dev/serving/pkg/reconciler/revision/config" @@ -87,10 +88,13 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision) // Update the revision status if pod cannot be scheduled (possibly resource constraints) // If pod cannot be scheduled then we expect the container status to be empty. - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { - rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) - break + treatPodAsAlwaysSchedulable := config.FromContext(ctx).Features.TreatPodAsAlwaysSchedulable + if treatPodAsAlwaysSchedulable != apicfg.Enabled { + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { + rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) + break + } } } diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index 57f020700096..55beb6178537 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -615,6 +615,32 @@ func TestReconcile(t *testing.T) { Object: pa("foo", "pod-schedule-error", WithReachabilityUnreachable), }}, Key: "foo/pod-schedule-error", + }, { + Name: "surface no pod schedule errors if treat-pod-as-always-schedulable is enabled", + // Test the propagation of the scheduling errors of Pod into the + // revision is not happening when treat-pod-as-always-schedulable + // is enabled. + Objects: []runtime.Object{ + Revision("foo", "pod-no-schedule-error", + WithLogURL, + MarkActivating("Deploying", ""), + WithRoutingState(v1.RoutingStateActive, fc), + withDefaultContainerStatuses(), + MarkDeploying("Deploying"), + WithRevisionObservedGeneration(1), + MarkContainerHealthyUnknown("Deploying"), + ), + pa("foo", "pod-no-schedule-error", WithReachabilityReachable), // PA can't be ready, since no traffic. + pod(t, "foo", "pod-no-schedule-error", WithUnschedulableContainer("Insufficient energy", "Unschedulable")), + deploy(t, "foo", "pod-no-schedule-error"), + image("foo", "pod-no-schedule-error"), + }, + Ctx: defaultconfig.ToContext(context.Background(), &defaultconfig.Config{ + Features: &defaultconfig.Features{ + TreatPodAsAlwaysSchedulable: defaultconfig.Enabled, + }, + }), + Key: "foo/pod-no-schedule-error", }, { Name: "ready steady state", // Test the transition that Reconcile makes when Endpoints become ready on the @@ -758,11 +784,18 @@ func TestReconcile(t *testing.T) { resolver: &nopResolver{}, } + cfg := reconcilerTestConfig() + + apiCfgOverride := defaultconfig.FromContext(ctx) + if apiCfgOverride != nil { + cfg.Config.Features = apiCfgOverride.Features + } + return revisionreconciler.NewReconciler(ctx, logging.FromContext(ctx), servingclient.Get(ctx), listers.GetRevisionLister(), controller.GetEventRecorder(ctx), r, controller.Options{ ConfigStore: &testConfigStore{ - config: reconcilerTestConfig(), + config: cfg, }, }) }))