From 071c7f46bb66170a5253642c273cfbdfc83ef2bc Mon Sep 17 00:00:00 2001 From: Oriano Destefani Date: Thu, 19 Sep 2024 18:56:53 +0200 Subject: [PATCH 1/4] add missing rook_mon and rook_mgr overwrites to helm-values to allow more flexible configuration. make rook_storage optional and dynamically generate nodes based on ceph-resource group make it backwards compatible with rook_storage_nodes which is still used in testbed Signed-off-by: Oriano de-Stefani --- roles/rook/defaults/main.yml | 73 ++++++++++++------- .../rook/templates/01-helm-values-all.yml.j2 | 48 ++++++++---- 2 files changed, 79 insertions(+), 42 deletions(-) diff --git a/roles/rook/defaults/main.yml b/roles/rook/defaults/main.yml index ca3c5f97e..07f225dc7 100644 --- a/roles/rook/defaults/main.yml +++ b/roles/rook/defaults/main.yml @@ -299,6 +299,27 @@ rook_cephconfig: {} # "osd.*": # osd_max_scrubs: "10" +rook_mon: + # Set the number of mons to be started. Generally recommended to be 3. + # For highest availability, an odd number of mons should be specified. + count: {{ rook_mon_count }} + # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason. + # Mons should only be allowed on the same node for test environments where data loss is acceptable. + allowMultiplePerNode: false + +rook_mgr: + # When higher availability of the mgr is needed, increase the count to 2. + # In that case, one mgr will be active and one in standby. When Ceph updates which + # mgr is active, Rook will update the mgr services to match the active mgr. + count: {{ rook_mgr_count }} + allowMultiplePerNode: false + modules: + # List of modules to optionally enable or disable. + # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR. + # - name: rook + # enabled: true + {{ rook_mgr_modules }} + ############################################## ### ### Storage Configuration @@ -316,35 +337,35 @@ rook_storage_config_encrypteddevice: "true" # define a device filter where to create OSDs rook_storage_devicefilter: "" # name nodes where to create OSDs -rook_storage_nodes: [] +#rook_storage_nodes: [] #TODO: deprecate? # - name: "testbed-node-0" # - name: "testbed-node-1" # - name: "testbed-node-2" -rook_storage: - useAllNodes: "{{ rook_storage_useallnodes }}" - useAllDevices: "{{ rook_storage_usealldevices }}" - # deviceFilter: - config: - # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map - # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. - # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB - osdsPerDevice: "{{ rook_storage_config_osdsperdevice }}" # this value can be overridden at the node or device level - encryptedDevice: "{{ rook_storage_config_encrypteddevice }}" # the default value for this option is "false" - # # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named - # # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. - # nodes: - # - name: "172.17.4.201" - # devices: # specific devices to use for storage can be specified for each node - # - name: "sdb" - # - name: "nvme01" # multiple osds can be created on high performance devices - # config: - # osdsPerDevice: "5" - # - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths - # config: # configuration can be specified at the node level which overrides the cluster level config - # - name: "172.17.4.301" - # deviceFilter: "^sd." - deviceFilter: "{{ rook_storage_devicefilter }}" - nodes: "{{ rook_storage_nodes }}" +#rook_storage: +# useAllNodes: "{{ rook_storage_useallnodes }}" +# useAllDevices: "{{ rook_storage_usealldevices }}" +# # deviceFilter: +# config: +# # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map +# # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. +# # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB +# osdsPerDevice: "{{ rook_storage_config_osdsperdevice }}" # this value can be overridden at the node or device level +# encryptedDevice: "{{ rook_storage_config_encrypteddevice }}" # the default value for this option is "false" +# # # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named +# # # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. +# # nodes: +# # - name: "172.17.4.201" +# # devices: # specific devices to use for storage can be specified for each node +# # - name: "sdb" +# # - name: "nvme01" # multiple osds can be created on high performance devices +# # config: +# # osdsPerDevice: "5" +# # - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths +# # config: # configuration can be specified at the node level which overrides the cluster level config +# # - name: "172.17.4.301" +# # deviceFilter: "^sd." +# deviceFilter: "{{ rook_storage_devicefilter }}" +# nodes: "{{ rook_storage_nodes }}" ############################################## ### diff --git a/roles/rook/templates/01-helm-values-all.yml.j2 b/roles/rook/templates/01-helm-values-all.yml.j2 index 816ffdfff..14587064d 100644 --- a/roles/rook/templates/01-helm-values-all.yml.j2 +++ b/roles/rook/templates/01-helm-values-all.yml.j2 @@ -123,25 +123,27 @@ cephClusterSpec: upgradeOSDRequiresHealthyPGs: false mon: + {{ rook_mon }} # Set the number of mons to be started. Generally recommended to be 3. # For highest availability, an odd number of mons should be specified. - count: {{ rook_mon_count }} + # count: {{ rook_mon_count }} # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason. # Mons should only be allowed on the same node for test environments where data loss is acceptable. - allowMultiplePerNode: false + # allowMultiplePerNode: false mgr: + {{ rook_mgr }} # When higher availability of the mgr is needed, increase the count to 2. # In that case, one mgr will be active and one in standby. When Ceph updates which # mgr is active, Rook will update the mgr services to match the active mgr. - count: {{ rook_mgr_count }} - allowMultiplePerNode: false - modules: + # count: {{ rook_mgr_count }} + # allowMultiplePerNode: false + #modules: # List of modules to optionally enable or disable. # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR. # - name: rook # enabled: true - {{ rook_mgr_modules }} + # {{ rook_mgr_modules }} # enable the ceph dashboard for viewing cluster status dashboard: @@ -355,16 +357,19 @@ cephClusterSpec: # cluster level storage configuration and selection storage: + {% if rook_storage %} {{ rook_storage }} - # useAllNodes: false - # useAllDevices: false + {% else %} + #otherwise use the following values + useAllNodes: "{{ rook_storage_useallnodes }}" + useAllDevices: "{{ rook_storage_usealldevices }}" # deviceFilter: - # config: - # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map - # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. - # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB - # osdsPerDevice: "1" # this value can be overridden at the node or device level - # encryptedDevice: "true" # the default value for this option is "false" + config: + # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map + # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. + # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB + osdsPerDevice: "{{ rook_storage_config_osdsperdevice }}" # this value can be overridden at the node or device level + encryptedDevice: "{{ rook_storage_config_encrypteddevice }}" # the default value for this option is "false" # # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named # # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. # nodes: @@ -378,8 +383,19 @@ cephClusterSpec: # config: # configuration can be specified at the node level which overrides the cluster level config # - name: "172.17.4.301" # deviceFilter: "^sd." - # deviceFilter: "^sd." - # nodes: [] + deviceFilter: "{{ rook_storage_devicefilter }}" + {% if rook_storage_nodes is defined %} + nodes: "{{ rook_storage_nodes }}" + {% else %} + nodes: + {% for host in groups['ceph-resource'] %} + - name: "{{ hostvars[host]['ansible_hostname'] }}" + {% if hostvars[host]['rook_storage_devicefilter'] is defined %} + deviceFilter: "{{ hostvars[host]['rook_storage_devicefilter'] }}" + {% endif %} + {% endfor %} + {% endif %} + {% endif %} # The section for configuring management of daemon disruptions during upgrade or fencing. disruptionManagement: From 0aede7c76c36b254175cf52e17259cd736f4139a Mon Sep 17 00:00:00 2001 From: Oriano de-Stefani Date: Thu, 19 Sep 2024 19:29:35 +0200 Subject: [PATCH 2/4] fix yamllint errors Signed-off-by: Oriano de-Stefani --- roles/rook/defaults/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/roles/rook/defaults/main.yml b/roles/rook/defaults/main.yml index 07f225dc7..809431d23 100644 --- a/roles/rook/defaults/main.yml +++ b/roles/rook/defaults/main.yml @@ -302,7 +302,7 @@ rook_cephconfig: {} rook_mon: # Set the number of mons to be started. Generally recommended to be 3. # For highest availability, an odd number of mons should be specified. - count: {{ rook_mon_count }} + count: "{{ rook_mon_count }}" # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason. # Mons should only be allowed on the same node for test environments where data loss is acceptable. allowMultiplePerNode: false @@ -311,14 +311,14 @@ rook_mgr: # When higher availability of the mgr is needed, increase the count to 2. # In that case, one mgr will be active and one in standby. When Ceph updates which # mgr is active, Rook will update the mgr services to match the active mgr. - count: {{ rook_mgr_count }} + count: "{{ rook_mgr_count }}" allowMultiplePerNode: false modules: # List of modules to optionally enable or disable. # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR. # - name: rook # enabled: true - {{ rook_mgr_modules }} + "{{ rook_mgr_modules }}" ############################################## ### @@ -337,11 +337,11 @@ rook_storage_config_encrypteddevice: "true" # define a device filter where to create OSDs rook_storage_devicefilter: "" # name nodes where to create OSDs -#rook_storage_nodes: [] #TODO: deprecate? +# rook_storage_nodes: [] #TODO: deprecate? # - name: "testbed-node-0" # - name: "testbed-node-1" # - name: "testbed-node-2" -#rook_storage: +# rook_storage: # useAllNodes: "{{ rook_storage_useallnodes }}" # useAllDevices: "{{ rook_storage_usealldevices }}" # # deviceFilter: From bd06db6aae2fa42c4af30c26f2fd96983793102a Mon Sep 17 00:00:00 2001 From: Oriano de-Stefani Date: Mon, 23 Sep 2024 15:02:41 +0200 Subject: [PATCH 3/4] fix j2 intendation and type casting errors Signed-off-by: Oriano de-Stefani --- .../rook/templates/01-helm-values-all.yml.j2 | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/roles/rook/templates/01-helm-values-all.yml.j2 b/roles/rook/templates/01-helm-values-all.yml.j2 index 14587064d..e1fb2ac28 100644 --- a/roles/rook/templates/01-helm-values-all.yml.j2 +++ b/roles/rook/templates/01-helm-values-all.yml.j2 @@ -357,12 +357,12 @@ cephClusterSpec: # cluster level storage configuration and selection storage: - {% if rook_storage %} + {% if rook_storage %} {{ rook_storage }} - {% else %} + {% else %} #otherwise use the following values - useAllNodes: "{{ rook_storage_useallnodes }}" - useAllDevices: "{{ rook_storage_usealldevices }}" + useAllNodes: {{ rook_storage_useallnodes }} + useAllDevices: {{ rook_storage_usealldevices }} # deviceFilter: config: # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map @@ -384,18 +384,18 @@ cephClusterSpec: # - name: "172.17.4.301" # deviceFilter: "^sd." deviceFilter: "{{ rook_storage_devicefilter }}" - {% if rook_storage_nodes is defined %} +{% if rook_storage_nodes is defined %} nodes: "{{ rook_storage_nodes }}" - {% else %} +{% else %} nodes: - {% for host in groups['ceph-resource'] %} +{% for host in groups['ceph-resource'] %} - name: "{{ hostvars[host]['ansible_hostname'] }}" - {% if hostvars[host]['rook_storage_devicefilter'] is defined %} +{% if hostvars[host]['rook_storage_devicefilter'] is defined %} deviceFilter: "{{ hostvars[host]['rook_storage_devicefilter'] }}" - {% endif %} - {% endfor %} - {% endif %} - {% endif %} +{% endif %} +{% endfor %} +{% endif %} +{% endif %} # The section for configuring management of daemon disruptions during upgrade or fencing. disruptionManagement: From 67acca342fe80f3eb19bd1aa773b8dd3c7b99bc3 Mon Sep 17 00:00:00 2001 From: Oriano de-Stefani Date: Tue, 24 Sep 2024 15:19:27 +0200 Subject: [PATCH 4/4] fix swapped out labels between mds and rgw Signed-off-by: Oriano de-Stefani --- roles/rook/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/rook/defaults/main.yml b/roles/rook/defaults/main.yml index 809431d23..93b19f47b 100644 --- a/roles/rook/defaults/main.yml +++ b/roles/rook/defaults/main.yml @@ -156,7 +156,7 @@ rook_placement_cephobjectstore: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: "node-role.osism.tech/{{ rook_placement_label_mds }}" + - key: "node-role.osism.tech/{{ rook_placement_label_rgw }}" operator: In values: - "true" @@ -174,7 +174,7 @@ rook_placement_cephfilesystem: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: "node-role.osism.tech/{{ rook_placement_label_rgw }}" + - key: "node-role.osism.tech/{{ rook_placement_label_mds }}" operator: In values: - "true"