Skip to content

Commit

Permalink
feat: optimize server/agent workload and shorten kube-vip lease
Browse files Browse the repository at this point in the history
  • Loading branch information
zifeo committed Jun 19, 2024
1 parent 2a7f723 commit cda7cf1
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 46 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ yourself `/etc/rancher/rke2/rke2.yaml` on server nodes.

```
# remove server url from rke2 config
vim /etc/rancher/rke2/config.yaml
sudo vim /etc/rancher/rke2/config.yaml
# ssh into one of the server nodes (see terraform output -json)
# restore s3 snapshot (see restore_cmd output of the terraform module):
sudo systemctl stop rke2-server
Expand Down
Binary file added agent.log.zip
Binary file not shown.
9 changes: 2 additions & 7 deletions manifests/velero.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ spec:
volumeMounts:
- mountPath: /target
name: plugins
nodeSelector:
node-role.kubernetes.io/master: "true"
tolerations:
- effect: NoExecute
key: CriticalAddonsOnly
Expand All @@ -38,7 +36,7 @@ spec:
memory: 128Mi
limits:
cpu: null
memory: null
memory: 256Mi
kubectl:
image:
repository: docker.io/bitnami/kubectl
Expand All @@ -53,7 +51,7 @@ spec:
bucket: ${bucket_velero}
config:
cloud: self
region: ${region}
region: ${region}
volumeSnapshotLocation:
- name: default
provider: csi
Expand All @@ -64,7 +62,6 @@ spec:
OS_APPLICATION_CREDENTIAL_SECRET: ${app_secret}
# for community.openstack.org/openstack (env vars do not work and take precedence over clouds.yaml unless cloud set)
OS_CLOUD: self

credentials:
# for community.openstack.org/openstack
secretContents:
Expand All @@ -89,10 +86,8 @@ spec:
mountPath: /etc/openstack/clouds.yaml
readOnly: true
subPath: clouds.yaml

backupsEnabled: true
snapshotsEnabled: true

deployNodeAgent: true
nodeAgent:
podVolumePath: /var/lib/kubelet/pods
Expand Down
12 changes: 3 additions & 9 deletions node/cloud-init.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ write_files:
curl -sfL https://get.rke2.io | sh -
fi
%{ if is_server ~}
%{~ if is_first ~}
%{~ for k, v in manifests_files ~}
- path: /opt/rke2/manifests/${k}
permissions: "0600"
Expand Down Expand Up @@ -129,7 +128,6 @@ write_files:
/usr/local/bin/customize-chart.sh "$CHARTS_DIR/$patch_name" "$patch"
fi
done
%{~ endif ~}
- path: /etc/modules-load.d/ipvs.conf
permissions: "0644"
owner: root:root
Expand Down Expand Up @@ -175,11 +173,11 @@ write_files:
- name: vip_leasename
value: plndr-cp-lock
- name: vip_leaseduration
value: "15"
value: "5"
- name: vip_renewdeadline
value: "10"
value: "3"
- name: vip_retryperiod
value: "2"
value: "1"
- name: enable_node_labeling
value: "true"
- name: lb_enable
Expand Down Expand Up @@ -292,17 +290,13 @@ runcmd:
- until [ -d /var/lib/rancher/rke2/agent/pod-manifests/ ]; do echo "Waiting for $(hostname) static pods"; sleep 1; done
- mv -v /opt/rke2/kube-vip.yaml /var/lib/rancher/rke2/agent/pod-manifests/kube-vip.yaml
- ls /var/lib/rancher/rke2/agent/pod-manifests
%{~ if is_first ~}
- wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64.tar.gz -O - | tar xz && mv yq_linux_amd64 /usr/bin/yq
- until [ -d /var/lib/rancher/rke2/data/v*/charts ]; do echo "Waiting for $(hostname) charts data"; sleep 1; done
- /usr/local/bin/customize-charts.sh $(realpath /var/lib/rancher/rke2/data/v*/charts)
- until [ -d /var/lib/rancher/rke2/server/manifests ]; do echo "Waiting for $(hostname) manifests"; sleep 1; done
- /usr/local/bin/customize-charts.sh /var/lib/rancher/rke2/server/manifests
- mv -v /opt/rke2/manifests/*.yaml /var/lib/rancher/rke2/server/manifests
- ls /var/lib/rancher/rke2/server/manifests
%{~ else ~}
- for i in $(find /var/lib/rancher/rke2/server/manifests -type f); do cp -v $i $i.skip; done
%{~ endif ~}
- until systemctl is-active -q rke2-server.service; do echo "Waiting for $(hostname) rke2 to start"; sleep 3; journalctl -u rke2-server.service --since "3 second ago"; done
%{~ else ~}
- systemctl enable rke2-agent.service
Expand Down
4 changes: 0 additions & 4 deletions node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ resource "openstack_compute_instance_v2" "instance" {
key_pair = var.keypair_name
config_drive = true

connection {
user = var.system_user
}

network {
port = openstack_networking_port_v2.port[count.index].id
}
Expand Down
13 changes: 7 additions & 6 deletions patches/rke2-coredns.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@

%{ if operator_replica > 1 }
nodeSelector:
node-role.kubernetes.io/master: "true"
resources:
requests:
cpu: "100m"
Expand All @@ -10,5 +6,10 @@ resources:
cpu: "100m" # because of autoscaler
memory: "128Mi"
autoscaler:
enabled: false
%{ endif }
min: ${operator_replica}
resources:
requests:
cpu: "20m"
memory: "10Mi"
limits:
memory: "10Mi"
7 changes: 0 additions & 7 deletions patches/rke2-metrics-server.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,7 +0,0 @@

nodeSelector:
node-role.kubernetes.io/master: "true"
tolerations:
- effect: NoExecute
key: CriticalAddonsOnly
operator: "Exists"
6 changes: 0 additions & 6 deletions patches/rke2-snapshot-controller.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@

nodeSelector:
node-role.kubernetes.io/master: "true"
tolerations:
- effect: NoExecute
key: CriticalAddonsOnly
operator: "Exists"
6 changes: 0 additions & 6 deletions patches/rke2-snapshot-validation-webhook.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@

nodeSelector:
node-role.kubernetes.io/master: "true"
tolerations:
- effect: NoExecute
key: CriticalAddonsOnly
operator: "Exists"

0 comments on commit cda7cf1

Please sign in to comment.