-
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathmain.yml
240 lines (201 loc) · 8.05 KB
/
main.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
---
# Automated setup of distributed Linpack benchmark.
#
# Inspired by: https://mikejmcfarlane.github.io/blog/2020/09/17/High-Performance-Linpack-for-raspberry-pi-supercomputer
# See also: https://www.sci-pi.org.uk/bench/linpack.html
- name: Install linpack benchmark.
hosts: cluster
become: false
tags: ['setup']
vars_files: ['config.yml']
tasks:
- include_tasks: dependencies/rhel-based.yml
when: ansible_os_family == 'RedHat'
- include_tasks: dependencies/debian-based.yml
when: ansible_os_family == 'Debian'
- include_tasks: dependencies/arch-based.yml
when: ansible_os_family == 'Archlinux'
- name: Create required temporary directories.
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}"
group: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}"
mode: 0755
loop:
- "{{ hpl_root }}/tmp"
- "{{ hpl_root }}/tmp/atlas-build"
become: true
- name: Download MPI (Message Passing Interface).
ansible.builtin.unarchive:
src: https://www.mpich.org/static/downloads/3.4.2/mpich-3.4.2.tar.gz
dest: "{{ hpl_root }}/tmp"
remote_src: true
creates: "{{ hpl_root }}/tmp/mpich-3.4.2/README"
- name: Build MPI (takes a while).
ansible.builtin.command: "{{ item }}"
args:
chdir: "{{ hpl_root }}/tmp/mpich-3.4.2"
creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
loop:
- ./configure --with-device=ch3:sock FFLAGS=-fallow-argument-mismatch
- "make -j{{ ansible_processor_nproc }}"
- name: Install MPI.
ansible.builtin.command: make install
args:
chdir: "{{ hpl_root }}/tmp/mpich-3.4.2"
creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
become: true
- name: Create 'COMPILE_MPI_COMPLETE' file.
file:
path: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
state: touch
mode: 0644
- name: Test if we can set CPU scaling parameters.
ansible.builtin.command: >-
ls /sys/devices/system/cpu/cpu0/cpufreq
failed_when: false
changed_when: false
register: cpufreq_exists
# Note: There was no simpler way to do this besides `shell`.
- name: Ensure CPU scaling is set to 'performance'.
ansible.builtin.shell: >-
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
when: cpufreq_exists.rc == 0
- name: Download ATLAS linear algebra library.
ansible.builtin.unarchive:
src: https://sourceforge.net/projects/math-atlas/files/Stable/3.10.3/atlas3.10.3.tar.bz2
dest: "{{ hpl_root }}/tmp"
remote_src: true
creates: "{{ hpl_root }}/tmp/ATLAS/README"
# The source code for the configure script that ATLAS uses makes calls
# with fgrep to find information about the system from its various
# outputs to set the configuration accordingly. However,
# grep versions 3.8+ throw the following warning when calling fgrep:
# 'fgrep: warning: fgrep is obsolescent; using grep -F'
# This occurs as output from fgrep which causes the ATLAS configuration
# to get invalid return values, causing the configuration to fail
# due to it assuming the first value is the number it's looking for.
#
# If the version of grep is 3.8 or newer, we have to patch the ATLAS
# source code to get rid of the warning produced by fgrep.
- name: Checking installed software.
package_facts:
manager: auto
- include_tasks: patch-atlas-src.yml
when: ansible_facts.packages['grep'][0].version is version("3.8", ">=")
- name: Install ATLAS (takes a LONG time).
ansible.builtin.command: "{{ item }}"
args:
chdir: "{{ hpl_root }}/tmp/atlas-build"
creates: "{{ hpl_root }}/tmp/COMPILE_ATLAS_COMPLETE"
loop:
- ../ATLAS/configure
- make
- name: Create 'COMPILE_ATLAS_COMPLETE' file.
file:
path: "{{ hpl_root }}/tmp/COMPILE_ATLAS_COMPLETE"
state: touch
mode: 0644
- name: Download HPL (High Performance Linpack).
ansible.builtin.unarchive:
src: http://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz
dest: "{{ hpl_root }}/tmp"
remote_src: true
creates: "{{ hpl_root }}/tmp/hpl-2.3/README"
- name: Set up HPL makefile.
ansible.builtin.shell: sh make_generic
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3/setup"
creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
- name: Copy rpi makefile into place.
ansible.builtin.template:
src: templates/benchmark-Make.rpi.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/Make.rpi"
mode: 0644
- name: Install HPL.
ansible.builtin.command: >-
make arch=rpi
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3"
creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
- name: Create COMPILE_HPL_COMPLETE file.
ansible.builtin.file:
path: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
state: touch
mode: 0644
# See: https://github.com/geerlingguy/top500-benchmark/issues/1
- name: Configure SSH connections between nodes.
hosts: cluster
become: false
tags: ['ssh']
vars_files: ['config.yml']
vars:
host_ips: []
tasks:
- name: Generate an OpenSSH keypair.
community.crypto.openssh_keypair:
path: "{{ ssh_user_home }}/.ssh/id_rsa"
size: 2048
- name: Read out ssh pubkey from each host.
ansible.builtin.command: cat "{{ ssh_user_home }}/.ssh/id_rsa.pub"
changed_when: false
register: ssh_pubkey
- name: Combine pubkeys into single list.
ansible.builtin.set_fact:
combined_ssh_pubkeys: "{{ ansible_play_hosts | map('extract', hostvars, 'ssh_pubkey') | map(attribute='stdout') | list }}"
run_once: true
- name: Write all pubkeys to each host.
ansible.posix.authorized_key:
user: "{{ ssh_user }}"
state: present
key: "{{ item }}"
loop: "{{ combined_ssh_pubkeys }}"
- name: Generate list of host IP addresses.
ansible.builtin.set_fact:
host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}"
loop: "{{ groups['cluster'] }}"
- name: Accept hostkeys for each host on each host.
ansible.builtin.command: >-
ssh {{ ssh_user }}@{{ item }} -o StrictHostKeyChecking=accept-new date
loop: "{{ host_ips }}"
- name: Write chunk of hosts information to the hosts file.
ansible.builtin.blockinfile:
path: /etc/hosts
marker: "# {mark} Ansible MPI host {{ item }}"
block: |
{{ hostvars[item].ansible_default_ipv4.address }} {{ item }} {{ item | replace('.local', '') }}
loop: "{{ groups['cluster'] }}"
become: true
- name: Run linpack benchmark.
hosts: cluster
become: false
tags: ['benchmark']
vars_files: ['config.yml']
tasks:
- name: Create a file describing nodes for MPI execution.
ansible.builtin.template:
src: templates/mpi-node-config.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/rpi/cluster-hosts"
mode: 0644
# Template originally generated using this website:
# https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/
- name: Create HPL.dat file.
ansible.builtin.template:
src: templates/HPL.dat.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/rpi/HPL.dat"
mode: 0644
# If this is not done, the nodes will fail to connect to each other
# causing the playbook to hang at 'Run the benchmark.'
- include_tasks: firewall/configure-firewall.yml
when: ansible_os_family == "RedHat"
- name: Run the benchmark.
ansible.builtin.command: mpirun -f cluster-hosts ./xhpl
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3/bin/rpi"
register: mpirun_output
run_once: true
- include_tasks: firewall/reset-firewall.yml
when: ansible_os_family == "RedHat"
- name: Output the results.
debug: var=mpirun_output.stdout