Skip to content

Commit

Permalink
GH-84 refactor DNSRecord conflict detection
Browse files Browse the repository at this point in the history
  • Loading branch information
maksymvavilov committed May 1, 2024
1 parent df607f6 commit 8a728b8
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 31 deletions.
48 changes: 34 additions & 14 deletions internal/controller/dnsrecord_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package controller

import (
"context"
"errors"
"fmt"
"strings"
"time"
Expand All @@ -45,7 +44,6 @@ import (

const (
DNSRecordFinalizer = "kuadrant.io/dns-record"
WriteCounterLimit = 20
validationRequeueVariance = 0.5
DefaultValidationDuration = time.Second * 5
)
Expand Down Expand Up @@ -275,6 +273,22 @@ func generationChanged(record *v1alpha1.DNSRecord) bool {
return record.Generation != record.Status.ObservedGeneration
}

// exponentialRequeueTime consumes the current time and doubles it until it reaches defaultRequeueTime
func exponentialRequeueTime(lastRequeueTime string) time.Duration {
lastRequeue, err := time.ParseDuration(lastRequeueTime)
// corrupted DNSRecord. This value naturally set only via time.Duration.String() call
if err != nil {
// default to the least confidence timeout
return validationRequeueTime
}
// double the duration. Return the max timeout if overshoot
newReqeueue := lastRequeue * 2
if newReqeueue > defaultRequeueTime {
return defaultRequeueTime
}
return newReqeueue
}

// setDNSRecordCondition adds or updates a given condition in the DNSRecord status..
func setDNSRecordCondition(dnsRecord *v1alpha1.DNSRecord, conditionType string, status metav1.ConditionStatus, reason, message string) {
cond := metav1.Condition{
Expand Down Expand Up @@ -382,23 +396,20 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp
return noRequeueDuration, err
}

dnsRecord.Status.ValidFor = defaultRequeueTime.String()
// uses .Status.ValidFor as the last requeue duration. Double it.
requeueTime := exponentialRequeueTime(dnsRecord.Status.ValidFor)
dnsRecord.Status.ValidFor = requeueTime.String()
dnsRecord.Status.QueuedAt = reconcileStart
if plan.Changes.HasChanges() {
// generation has not changed but there are changes.
// implies that they were overridden - bump write counter
if !generationChanged(dnsRecord) {
if dnsRecord.Status.WriteCounter < WriteCounterLimit {
dnsRecord.Status.WriteCounter++
wrtiteCounter.WithLabelValues(dnsRecord.Name, dnsRecord.Namespace).Inc()
logger.V(3).Info("Changes needed on the same generation of record")
} else {
err = errors.New("reached write limit to the DNS provider for the same generation of record")
logger.Error(err, "Giving up on trying to maintain desired state of the DNS record - changes are being overridden")
return noRequeueDuration, err
}
dnsRecord.Status.WriteCounter++
wrtiteCounter.WithLabelValues(dnsRecord.Name, dnsRecord.Namespace).Inc()
logger.V(3).Info("Changes needed on the same generation of record")
}
dnsRecord.Status.ValidFor = validationRequeueTime.String()
requeueTime = validationRequeueTime
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, "AwaitingValidation", "Awaiting validation")
logger.Info("Applying changes")
err = registry.ApplyChanges(ctx, plan.Changes)
Expand All @@ -407,10 +418,19 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp
}
} else {
logger.Info("All records are already up to date")
// reset the valid for from randomized value to a fixed value once validation succeeds
if !meta.IsStatusConditionTrue(dnsRecord.Status.Conditions, string(v1alpha1.ConditionTypeReady)) {
requeueTime = exponentialRequeueTime(validationRequeueTime.String())
dnsRecord.Status.ValidFor = requeueTime.String()
}
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionTrue, "ProviderSuccess", "Provider ensured the dns record")
}

// reset the counter on the gen change regardless of having changes in the plan
if generationChanged(dnsRecord) {
dnsRecord.Status.WriteCounter = 0
wrtiteCounter.WithLabelValues(dnsRecord.Name, dnsRecord.Namespace).Set(0)
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionTrue, "ProviderSuccess", "Provider ensured the dns record")
}

return defaultRequeueTime, nil
return requeueTime, nil
}
53 changes: 37 additions & 16 deletions internal/controller/dnsrecord_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ var _ = Describe("DNSRecordReconciler", func() {

})

It("should increase write counter if fail to publish record or record is overridden", func() {
It("should detect a conflict and the resolution of a conflict", func() {
dnsRecord = &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{
Name: "foo-record-1",
Expand All @@ -199,19 +199,7 @@ var _ = Describe("DNSRecordReconciler", func() {
ManagedZoneRef: &v1alpha1.ManagedZoneReference{
Name: managedZone.Name,
},
Endpoints: []*externaldnsendpoint.Endpoint{
{
DNSName: "foo.example.com",
Targets: []string{
"127.0.0.1",
},
RecordType: "A",
SetIdentifier: "",
RecordTTL: 60,
Labels: nil,
ProviderSpecific: nil,
},
},
Endpoints: getTestEndpoints(),
},
}
dnsRecord2 = &v1alpha1.DNSRecord{
Expand Down Expand Up @@ -273,7 +261,7 @@ var _ = Describe("DNSRecordReconciler", func() {
"ObservedGeneration": Equal(dnsRecord.Generation),
})),
)
g.Expect(dnsRecord.Status.WriteCounter).To(BeNumerically(">", int64(0)))
g.Expect(dnsRecord.Status.WriteCounter).To(BeNumerically(">", int64(1)))

err = k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord2), dnsRecord2)
g.Expect(err).NotTo(HaveOccurred())
Expand All @@ -286,7 +274,40 @@ var _ = Describe("DNSRecordReconciler", func() {
"ObservedGeneration": Equal(dnsRecord2.Generation),
})),
)
g.Expect(dnsRecord2.Status.WriteCounter).To(BeNumerically(">", int64(0)))
g.Expect(dnsRecord2.Status.WriteCounter).To(BeNumerically(">", int64(1)))
}, TestTimeoutLong, time.Second).Should(Succeed())

By("fixing conflict with dnsrecord " + dnsRecord2.Name + " with endpoint dnsName: `foo.example.com` and target: `127.0.0.1`")
Eventually(func(g Gomega) {
// refresh the second record CR
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord2), dnsRecord2)
g.Expect(err).NotTo(HaveOccurred())
}, TestTimeoutShort, time.Second).Should(Succeed())
dnsRecord2.Spec.Endpoints = getTestEndpoints()
Expect(k8sClient.Update(ctx, dnsRecord2)).To(Succeed())

Eventually(func(g Gomega) {
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), dnsRecord)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(dnsRecord.Status.Conditions).To(
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeReady)),
"Status": Equal(metav1.ConditionTrue),
"Reason": Equal("ProviderSuccess"),
"Message": Equal("Provider ensured the dns record"),
})),
)

err = k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord2), dnsRecord2)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(dnsRecord2.Status.Conditions).To(
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeReady)),
"Status": Equal(metav1.ConditionTrue),
"Reason": Equal("ProviderSuccess"),
"Message": Equal("Provider ensured the dns record"),
})),
)
}, TestTimeoutLong, time.Second).Should(Succeed())
})

Expand Down
3 changes: 2 additions & 1 deletion internal/controller/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import (
)

const (
TestTimeoutShort = time.Second * 5
TestTimeoutMedium = time.Second * 15
TestTimeoutLong = time.Second * 30
TestRetryIntervalMedium = time.Millisecond * 250
RequeueDuration = time.Second * 6
ValidityDuration = time.Second * 3
ValidityDuration = time.Second * 1
defaultNS = "default"
providerCredential = "secretname"
)
Expand Down

0 comments on commit 8a728b8

Please sign in to comment.