Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hold startup until ConfigMaps are ready or die #1172

Merged
merged 1 commit into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions pkg/generator/ingress_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ func (translator *IngressTranslator) translateIngress(ctx context.Context, ingre
externalTLSHosts := make([]*route.VirtualHost, 0, len(ingress.Spec.Rules))
clusters := make([]*v3.Cluster, 0, len(ingress.Spec.Rules))

cfg := config.FromContext(ctx)

for i, rule := range ingress.Spec.Rules {
ruleName := fmt.Sprintf("(%s/%s).Rules[%d]", ingress.Namespace, ingress.Name, i)

Expand Down Expand Up @@ -194,17 +196,9 @@ func (translator *IngressTranslator) translateIngress(ctx context.Context, ingre

var transportSocket *envoycorev3.TransportSocket

// This has to be "OrDefaults" because this path could be called before the informers are
// running when booting the controller up and prefilling the config before making it
// ready.
//
// TODO:
// Drop this configmap check - issues/968
// TODO: drop this configmap check - issues/968
// We could determine whether system-internal-tls is enabled or disabled via the flag only,
// but all conformance tests need to be updated to have the port name so we check the configmap as well.
//
// TODO: Or fetch configmap before the loop as per https://github.com/knative-sandbox/net-kourier/pull/959#discussion_r1048441513
cfg := config.FromContextOrDefaults(ctx)

// As Ingress with RewriteHost points to ExternalService(kourier-internal), we don't enable TLS.
if (cfg.Network.SystemInternalTLSEnabled() || httpsPortUsed) && httpPath.RewriteHost == "" {
Expand Down
52 changes: 50 additions & 2 deletions pkg/reconciler/ingress/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ package ingress

import (
"context"
"fmt"
"log"
"strings"
"time"

v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
xds "github.com/envoyproxy/go-control-plane/pkg/server/v3"
Expand All @@ -27,12 +30,13 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
v1 "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/tools/cache"
"knative.dev/net-kourier/pkg/config"
envoy "knative.dev/net-kourier/pkg/envoy/server"
"knative.dev/net-kourier/pkg/generator"
rconfig "knative.dev/net-kourier/pkg/reconciler/ingress/config"
store "knative.dev/net-kourier/pkg/reconciler/ingress/config"
"knative.dev/networking/pkg/apis/networking/v1alpha1"
networkingClientSet "knative.dev/networking/pkg/client/clientset/versioned/typed/networking/v1alpha1"
knativeclient "knative.dev/networking/pkg/client/injection/client"
Expand All @@ -50,6 +54,7 @@ import (
"knative.dev/pkg/controller"
"knative.dev/pkg/logging"
"knative.dev/pkg/reconciler"
"knative.dev/pkg/system"
)

const (
Expand Down Expand Up @@ -96,7 +101,7 @@ func NewController(ctx context.Context, cmw configmap.Watcher) *controller.Impl
resync := configmap.TypeFilter(configsToResync...)(func(string, interface{}) {
impl.FilteredGlobalResync(isKourierIngress, ingressInformer.Informer())
})
configStore := rconfig.NewStore(logger.Named("config-store"), resync)
configStore := store.NewStore(logger.Named("config-store"), resync)
configStore.WatchConfigs(cmw)
return controller.Options{
ConfigStore: configStore,
Expand Down Expand Up @@ -204,6 +209,10 @@ func NewController(ctx context.Context, cmw configmap.Watcher) *controller.Impl
}
logger.Infof("Priming the config with %d ingresses", len(ingressesToSync))

// startupTranslator will read the configuration from ctx, so we need to wait until
// the ConfigMaps are present or die
ctx = ensureCtxWithConfigOrDie(ctx)

// The startup translator uses clients instead of listeners to correctly list all
// resources at startup.
startupTranslator := generator.NewIngressTranslator(
Expand Down Expand Up @@ -345,3 +354,42 @@ func getSecretInformer(ctx context.Context) v1.SecretInformer {
untyped := ctx.Value(filteredFactory.LabelKey{}) // This should always be not nil and have exactly one selector
return secretfilteredinformer.Get(ctx, untyped.([]string)[0])
}

func ensureCtxWithConfigOrDie(ctx context.Context) context.Context {
var err error
var cfg *store.Config
if pollErr := wait.PollUntilContextTimeout(ctx, time.Second, 60*time.Second, true, func(context.Context) (bool, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to make the timeout configurable just in case someone has a slow env?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm I don't think thats necessary. It is only until all yamls are applied when installing (from kubectl or operator). I don't think that should ever be longer than 60 seconds, and if so, it would just restart the pod and you have another 60s.

Copy link
Contributor

@skonto skonto Dec 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know about what value is good enough tbh, 1m might be enough.
Restarting is an option but probably you don't want people to have to do that in case they have a big slow cluster (api server under pressure), just in case.
I mean that if they know they have a slow cluster they can configure it high enough (eg. via env var) and forget about it, otherwise they will have to monitor it and restart it.
Thinking out loud here.

cfg, err = getInitialConfig(ctx)
return err == nil, nil
}); pollErr != nil {
log.Fatalf("Timed out waiting for configuration in ConfigMaps: %v", err)
}

ctx = store.ToContext(ctx, cfg)
return ctx
}

func getInitialConfig(ctx context.Context) (*store.Config, error) {
networkCM, err := kubeclient.Get(ctx).CoreV1().ConfigMaps(system.Namespace()).Get(ctx, netconfig.ConfigMapName, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to fetch network config: %w", err)
}
networkConfig, err := netconfig.NewConfigFromMap(networkCM.Data)
if err != nil {
return nil, fmt.Errorf("failed to construct network config: %w", err)
}

kourierCM, err := kubeclient.Get(ctx).CoreV1().ConfigMaps(system.Namespace()).Get(ctx, config.ConfigName, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to fetch kourier config: %w", err)
}
kourierConfig, err := config.NewConfigFromMap(kourierCM.Data)
if err != nil {
return nil, fmt.Errorf("failed to construct kourier config: %w", err)
}

return &store.Config{
Kourier: kourierConfig,
Network: networkConfig,
}, nil
}
Loading