-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReactionDiffusion.cpp
369 lines (295 loc) · 16.1 KB
/
ReactionDiffusion.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#include <iostream>
#include <iomanip>
#include <fstream>
#include "ReactionDiffusion.h"
using namespace std;
/**
* @brief Takes in the command-line arguments as parsed by boost_program_options and stores
* their values in the class members. Allocates memory to solutions fields. Pre-calculates any
* variables that will be repeatedly used in time integration. Calculates number of time-steps
* required to perform the desired time integration.
* @param arg_dt Integration time-step.
* @param arg_T Total integration time.
* @param arg_Nx Number of grid-points/nodes along x direction.
* @param arg_Ny Number of grid-points/nodes along y direction
* @param arg_a Parameter 'a' of Barkley model problem.
* @param arg_b Parameter 'b' of Barkley model problem.
* @param arg_mu1 Diffusion coefficient for 'u'.
* @param arg_mu2 Diffusion coefficient for 'v'.
* @param arg_eps Parameter 'eps' of Barkley model problem.
*/
void ReactionDiffusion::SetParameters(
const double& arg_dt, const int& arg_T,
const int& arg_Nx, const int& arg_Ny,
const double& arg_a, const double& arg_b,
const double& arg_mu1, const double& arg_mu2,
const double& arg_eps) {
// Storing arguments in class variables.
dt = arg_dt;
T = arg_T;
Nx = arg_Nx;
Ny = arg_Ny;
a = arg_a;
b = arg_b;
mu1 = arg_mu1;
mu2 = arg_mu2;
eps = arg_eps;
// Pre-computing helpful variables, as specified in header file.
recip_a = 1.0 / a;
u_grad_coef = mu1 * dt;
v_grad_coef = mu2 * dt;
dt_eps = dt * eps;
b_over_a = b / a;
half_a = a / 2.0;
Lx_index = (Nx-1);
Ly_index = Nx * (Ny-1);
// Allocating memory for the solution fields.
u = new double[Nx*Ny];
v = new double[Nx*Ny];
u_next = new double[Nx*Ny];
v_next = new double[Nx*Ny];
// Since we are technically performing a division between an int T and a double dt
// could be more careful/robust to account for the cases where T is not divisible
// by dt. However, in the handout it is taken that T=100 and dt=0.001 all the time,
// so this never becomes a problem
nr_timesteps = T / dt;
// Printing values of all the parameters to the terminal.
cout << endl;
cout << "Parameters of PDE problem to solve:" << endl;
cout << left << setw(18) << " - dt" << dt << endl;
cout << left << setw(18) << " - T:" << T << endl;
cout << left << setw(18) << " - Nx:" << Nx << endl;
cout << left << setw(18) << " - Ny:" << Ny << endl;
cout << left << setw(18) << " - a:" << a << endl;
cout << left << setw(18) << " - b:" << b << endl;
cout << left << setw(18) << " - mu1:" << mu1 << endl;
cout << left << setw(18) << " - mu2:" << mu2 << endl;
cout << left << setw(18) << " - epsilon:" << eps << endl;
cout << endl;
}
/**
* @brief Populates the 'u','v' arrays with their initial conditions. These are stored in
* column-major format.
*/
void ReactionDiffusion::SetInitialConditions() {
// Since this is only called once, order of loops is negligible for performance.
// Nevertheless, for column-major matrices, it is best to have the inner loop be over
// the rows (i.e. make the row index be the fastest changing index) as this way we
// exploit cache-locality.
// Throughout the code, 'i' is used for the row index, 'j' for the column index,
// 'Nx' is the number of rows and 'Ny' the number of columns.
for (int j = 0; j < Ny; ++j) {
for (int i = 0; i < Nx; ++i) {
// If y > Ly/2 (note that Ly = (Ny-1)*dy, where we are taking dy=1).
if (j > (Ny-1)/2.0) {
u[i + j*Nx] = 1.0;
}
else{
u[i + j*Nx] = 0.0;
}
// If x < Lx/2 (note that Lx = (Nx-1)*dx, where we are taking dx=1).
if (i < (Nx-1)/2.0) {
v[i + j*Nx] = half_a;
}
else{
v[i + j*Nx] = 0.0;
}
}
}
}
/**
* @brief Performs integration over time using the Explicit (Forward)
* Euler numerical scheme. A discrete update of the 'u','v' arrays is run \f$ n=T/dt \f$ times
* where \f$ dt \f$ is time-step used for integration and
* \f$ T \f$ the total integration time.
*/
void ReactionDiffusion::TimeIntegrate() {
cout << "Starting numerical solving of PDE.\n" << endl;
// Each iteration of this for-loop performs one update of the 'u','v' solution fields.
// i.e. a single iteration updates u^{n} -> u^{n+1} and v^{n} -> v^{n+1}.
for (int timestep = 0; timestep < nr_timesteps; ++timestep) {
// Single omp parallel, so there is only one forking/joining of threads per iteration.
#pragma omp parallel
{
// We are calculating firstly the update to the 'u' solution field and only
// later the update to the 'v' solution field. This is done to try to exploit cache-locality.
// Even though this is not perfect as for u_{i,j} we use v_{i,j} in the reaction term (and
// likewise for v_{i,j}), doing it this way provides performance improvements (especially
// in serial but also in parallel).
// Using single nowait for the corners so only a single thread executes them, and while it is doing so
// the other threads move forwards in the code.
// Corner (0, 0) (i=0, j=0)
#pragma omp single nowait
u_next[0] = u[0] + u_grad_coef * (u[1] + u[Nx] - 2*u[0])
+ dt_eps * u[0] * (1.0 - u[0]) * (u[0] - v[0] * recip_a - b_over_a);
// Corner (Lx, 0) (i=Nx-1, j=0) Recall: Lx_index = (Nx-1)
#pragma omp single nowait
u_next[Lx_index] = u[Lx_index] + u_grad_coef*(u[Lx_index - 1] + u[Lx_index + Nx] - 2*u[Lx_index])
+ dt_eps * u[Lx_index] * (1.0 - u[Lx_index]) * (u[Lx_index] - v[Lx_index] * recip_a - b_over_a);
// Corner (0, Ly) (i=0, j=Ny-1) Recall: Ly_index = Nx*(Ny-1)
#pragma omp single nowait
u_next[Ly_index] = u[Ly_index] + u_grad_coef*(u[Ly_index + 1] + u[Ly_index - Nx] - 2*u[Ly_index])
+ dt_eps * u[Ly_index] * (1.0 - u[Ly_index]) * (u[Ly_index] - v[Ly_index] * recip_a - b_over_a);
// Corner (Lx, Ly) (i=Nx-1, j=Ny-1)
#pragma omp single nowait
u_next[Lx_index + Ly_index] = u[Lx_index + Ly_index] + u_grad_coef*(u[Lx_index + Ly_index - 1] + u[Lx_index + Ly_index - Nx] - 2*u[Lx_index + Ly_index])
+ dt_eps * u[Lx_index+Ly_index] * (1.0 - u[Lx_index+Ly_index]) * (u[Lx_index+Ly_index] - v[Lx_index+Ly_index] * recip_a - b_over_a);
// Using nowait so there's no implicit synchronization/barrier at the end of the loop (would degrade performance).
// Edge BCs for fixed y values.
#pragma omp for nowait
for (int i = 1; i < (Nx-1); i++) {
// Along (y==0) (0<i<Nx-1, j=0)
u_next[i] = u[i] + u_grad_coef*(u[i+1] + u[i-1] + u[i + Nx] - 3*u[i])
+ dt_eps * u[i] * (1.0 - u[i]) * (u[i] - v[i] * recip_a - b_over_a);
// Along (y==Ly) (0<i<Nx-1, j=Ny-1)
u_next[i + Ly_index] = u[i + Ly_index] + u_grad_coef*(u[i+1 + Ly_index] + u[i-1 + Ly_index] + u[i - Nx + Ly_index] - 3*u[i + Ly_index])
+ dt_eps * u[i+Ly_index] * (1.0 - u[i+Ly_index]) * (u[i+Ly_index] - v[i+Ly_index] * recip_a - b_over_a);
}
// Fastest changing index is the row-index so as to maximize the exploitation of cache-locality.
// Central points (0<i<Nx-1, 0<j<Ny-1)
#pragma omp for nowait
for (int j = 1; j < (Ny-1); ++j) {
for (int i = 1; i < (Nx-1); ++i) {
u_next[i+j*Nx] = u[i+j*Nx] + u_grad_coef*(u[i+1 + j*Nx] + u[i-1 + j*Nx] + u[i+(j+1)*Nx] + u[i+(j-1)*Nx] - 4*u[i+j*Nx])
+ dt_eps * u[i+j*Nx] * (1.0 - u[i+j*Nx]) * (u[i+j*Nx] - v[i+j*Nx] * recip_a - b_over_a);
}
}
// Edge BCs for fixed x values.
#pragma omp for nowait
for (int j = 1; j < (Ny-1); j++) {
// Along (x==0) (i=0, 0<j<Ny-1)
u_next[j*Nx] = u[j*Nx] + u_grad_coef*(u[1 + j*Nx] + u[(j+1)*Nx] + u[(j-1)*Nx] - 3*u[j*Nx])
+ dt_eps * u[j*Nx] * (1.0 - u[j*Nx]) * (u[j*Nx] - v[j*Nx] * recip_a - b_over_a);
// Along (x==Lx) (i=Nx-1, 0<j<Ny-1)
u_next[Lx_index + j*Nx] = u[Lx_index + j*Nx] + u_grad_coef*(u[Lx_index - 1 + j*Nx] + u[Lx_index + (j+1)*Nx] + u[Lx_index + (j-1)*Nx] - 3*u[Lx_index + j*Nx])
+ dt_eps * u[Lx_index + j*Nx] * (1.0 - u[Lx_index + j*Nx]) * (u[Lx_index + j*Nx] - v[Lx_index + j*Nx] * recip_a - b_over_a);
}
// Now calculating the updates for the 'v' field.
// Corner (0, 0) (i=0, j=0)
#pragma omp single nowait
v_next[0] = v[0] + v_grad_coef * (v[1] + v[Nx] - 2*v[0])
+ dt * (u[0] * u[0] * u[0] - v[0]);
// Corner (Lx, 0) (i=Nx-1, j=0) Lx_index = (Nx-1)
#pragma omp single nowait
v_next[Lx_index] = v[Lx_index] + v_grad_coef*(v[Lx_index - 1] + v[Lx_index + Nx] - 2*v[Lx_index])
+ dt * (u[Lx_index] * u[Lx_index] * u[Lx_index] - v[Lx_index]);
// Corner (0, Ly) (i=0, j=Ny-1) Ly_index = Nx*(Ny-1)
#pragma omp single nowait
v_next[Ly_index] = v[Ly_index] + v_grad_coef*(u[Ly_index + 1] + v[Ly_index - Nx] - 2*v[Ly_index])
+ dt * (u[Ly_index] * u[Ly_index] * u[Ly_index] - v[Ly_index]);
// Corner (Lx, Ly) (i=Nx-1, j=Ny-1)
#pragma omp single nowait
v_next[Lx_index + Ly_index] = v[Lx_index + Ly_index] + v_grad_coef*(v[Lx_index + Ly_index - 1] + v[Lx_index + Ly_index - Nx] - 2*v[Lx_index + Ly_index])
+ dt * (u[Lx_index+Ly_index] * u[Lx_index+Ly_index] * u[Lx_index+Ly_index] - v[Lx_index+Ly_index]);
// Edge BCs for fixed y values.
#pragma omp for nowait
for (int i = 1; i < (Nx-1); i++) {
// Along (y==0) (0<i<Nx, j=0)
v_next[i] = v[i] + v_grad_coef*(v[i+1] + v[i-1] + v[i + Nx] - 3*v[i])
+ dt * (u[i] * u[i] * u[i] - v[i]);
// Along (y==Ly) (0<i<Nx, j=Ny-1)
v_next[i + Ly_index] = v[i+Ly_index] + v_grad_coef*(v[i+1 + Ly_index] + v[i-1 + Ly_index] + v[i - Nx + Ly_index] - 3*v[i + Ly_index])
+ dt * (u[i+Ly_index] * u[i+Ly_index] * u[i+Ly_index] - v[i+Ly_index]);
}
// Central points (0<i<Nx-1, 0<j<Ny-1)
#pragma omp for nowait
for (int j = 1; j < (Ny-1); ++j) {
for (int i = 1; i < (Nx-1); ++i) {
v_next[i+j*Nx] = v[i+j*Nx] + v_grad_coef*(v[i+1 + j*Nx] + v[i-1 + j*Nx] + v[i+(j+1)*Nx] + v[i+(j-1)*Nx] - 4*v[i+j*Nx])
+ dt * (u[i+j*Nx] * u[i+j*Nx] * u[i+j*Nx] - v[i+j*Nx]);
}
}
// Edge BCs for fixed x values.
#pragma omp for nowait
for (int j = 1; j < (Ny-1); j++) {
// Along (x==Lx) (i=Nx-1, 0<j<Ny-1)
v_next[Lx_index + j*Nx] = v[Lx_index + j*Nx] + v_grad_coef*(v[Lx_index - 1 + j*Nx] + v[Lx_index + (j+1)*Nx] + v[Lx_index + (j-1)*Nx] - 3*v[Lx_index + j*Nx])
+ dt * (u[Lx_index+j*Nx] * u[Lx_index+j*Nx] * u[Lx_index+j*Nx] - v[Lx_index+j*Nx]);
// Along (x==0) (i=0, 0<j<Ny-1)
v_next[j*Nx] = v[j*Nx] + v_grad_coef*(v[1 + j*Nx] + v[(j+1)*Nx] + v[(j-1)*Nx] - 3*v[j*Nx])
+ dt * (u[j*Nx] * u[j*Nx] * u[j*Nx] - v[j*Nx]);
}
} // End of pragma parallel region (there is an implicit barrier)
// All the threads have finished running due to implicit barrier above (i.e. all the grid-points have been
// updated from time-step n to time-step n+1. Thus we can now store the calculated u^{n+1} = u_next
// and v^{n+1} = v_next into 'u','v' and prepare for following time-step.
// Note: we are also passing the 'old' 'u','v' into 'u_next' and 'v_next' but this is fine since
// when evaluating 'u_next' in the following time-step we will solely use the values of 'u' and not any
// values in 'u_next'.
swap(u, u_next);
swap(v, v_next);
// Providing feedback to the user of how fast the program is running during execution.
// Has negligible performance impact so was left in.
if (timestep % 10000 == 0) {
cout << "Time-step: " << right << setw(6) << timestep
<< "/" << nr_timesteps
<< " (" << right << setw(2) << 100*timestep/nr_timesteps << "%)" << endl;
}
} // End of time-loop (i.e. finished integrating over time).
cout << "\nFinished solving PDE (from t_i = 0 to t_f = T).\n" << endl;
}
/**
* @brief Responsible for saving results of numerical simulation to a text
* file called 'output.txt'.
*
* To visualize the result of the simulation one can run the following commands:
*
* $ gnuplot
*
* $ set pm3d at st
*
* $ set view map
*
* $ set cbrange[0:1]
*
* and then
*
* $ splot 'output.txt' using 1:2:3 w l palette
*
* to plot 'u' over the domain, or instead
*
* $ splot 'output.txt' using 1:2:4 w l palette
*
* to plot 'v' over the domain.
*
* For the test4 use '$ xrange[0:150]' to account for the non-square domain.
*
*/
void ReactionDiffusion::SaveToFile() {
cout << "Writing output of simulation to file 'output.txt'." << endl;
ofstream vOut("output.txt", ios::out | ios::trunc);
// Checking that file opened successfully.
if (vOut.is_open()) {
cout << "File opened successfully." << endl;
// Writing solution row-by-row (x y u v)
// Not super efficient since vectors are stored column-wise.
// One could instead store the vectors row-wise to improve performance here but
// since this is only called once, it was deemed that the potential performance
// increase (through cache-locality) was not worth it to change from the convention
// of storing matrices column-wise.
for (int j = 0; j < Ny; ++j) {
for (int i = 0; i < Nx; ++i){
vOut << i << " "
<< j << " "
<< u[i + j*Nx] << " "
<< v[i + j*Nx] << endl;
}
vOut << endl; // Empty line after each row of points
}
}
else {
cout << "Did not open vOut successfully!" << endl;
}
vOut.close(); // Closing file.
cout << "Finished writing to file." << endl;
}
/**
* @brief Performs clean up duties.
*/
ReactionDiffusion::~ReactionDiffusion() {
// De-allocating memory.
delete[] u;
delete[] v;
delete[] u_next;
delete[] v_next;
}