From 98ac2f75419e9f681a6325e36acc8350bb6f80b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Januszewski?= Date: Thu, 20 Dec 2012 14:04:31 +0100 Subject: [PATCH] Split Shan-Chen and Free-Energy code into two kernels, one per lattice. --- examples/sc_drop.py | 2 +- examples/sc_phase_separation.py | 2 +- sailfish/backend_cuda.py | 1 + sailfish/lb_base.py | 13 +- sailfish/lb_binary.py | 255 ++++++++++++----- sailfish/lb_single.py | 28 +- sailfish/subdomain_runner.py | 33 +-- sailfish/sym_force.py | 1 - sailfish/templates/binary_fluid.mako | 324 +++++++++++----------- sailfish/templates/binary_shan_chen.mako | 121 ++++++++ sailfish/templates/boundary.mako | 5 +- sailfish/templates/kernel_common.mako | 33 ++- sailfish/templates/relaxation.mako | 161 +++++------ sailfish/templates/relaxation_common.mako | 116 ++++---- sailfish/templates/shan_chen.mako | 156 ++--------- sailfish/templates/single_fluid.mako | 38 +-- 16 files changed, 702 insertions(+), 587 deletions(-) create mode 100644 sailfish/templates/binary_shan_chen.mako diff --git a/examples/sc_drop.py b/examples/sc_drop.py index b9dd2833..26b9cb3c 100755 --- a/examples/sc_drop.py +++ b/examples/sc_drop.py @@ -23,7 +23,7 @@ def update_defaults(cls, defaults): defaults.update({ 'lat_nx': 256, 'lat_ny': 256, - 'G': 5.0, + 'G': -5.0, 'visc': 1.0 / 6.0, 'periodic_x': True, 'periodic_y': True, diff --git a/examples/sc_phase_separation.py b/examples/sc_phase_separation.py index b4735893..9ecbab69 100755 --- a/examples/sc_phase_separation.py +++ b/examples/sc_phase_separation.py @@ -24,7 +24,7 @@ def update_defaults(cls, defaults): defaults.update({ 'lat_nx': 256, 'lat_ny': 256, - 'G': 5.0, + 'G': -5.0, 'visc': 1.0 / 6.0, 'periodic_x': True, 'periodic_y': True, diff --git a/sailfish/backend_cuda.py b/sailfish/backend_cuda.py index d9be644c..27f0b6b3 100644 --- a/sailfish/backend_cuda.py +++ b/sailfish/backend_cuda.py @@ -199,6 +199,7 @@ def get_kernel(self, prog, name, block, args, args_format, shared=0, if needs_iteration: args_format += 'i' + args = list(args) args.append(0) self._iteration_kernels.append(kern) diff --git a/sailfish/lb_base.py b/sailfish/lb_base.py index 0cbabe47..d8051cfa 100644 --- a/sailfish/lb_base.py +++ b/sailfish/lb_base.py @@ -13,6 +13,7 @@ FieldPair = namedtuple('FieldPair', 'abstract buffer') ForcePair = namedtuple('ForcePair', 'numeric symbolic') +KernelPair = namedtuple('KernelPair', 'primary secondary') class LBSim(object): """Describes a specific type of a lattice Boltzmann simulation.""" @@ -173,7 +174,14 @@ def after_step(self, runner): pass def get_compute_kernels(self, runner, full_output, bulk): - return [] + """ + :param runner: SubdomainRunner object + :param full_output: if True, returns kernels that prepare fields for + visualization or saving into a file + :param bulk: if True, returns kernels that process the bulk domain, + otherwise returns kernels that process the subdomain boundary + """ + return KernelPair(None, None) def get_pbc_kernels(self, runner): return [] @@ -258,6 +266,9 @@ def use_force_for_equilibrium(self, force_grid, target_grid): To disable acceleration on a grid, pass an invalid grid ID in force_grid (e.g. None or -1). + Note: this is currently only supported in the free-energy MRT model. + The force reassignment will be silently ignored in other models. + :param force_grid: grid ID from which the acceleration will be used :param target_grid: grid ID on which the acceleration will act """ diff --git a/sailfish/lb_binary.py b/sailfish/lb_binary.py index e4c5bd32..23380edf 100644 --- a/sailfish/lb_binary.py +++ b/sailfish/lb_binary.py @@ -8,7 +8,7 @@ from functools import partial import numpy as np from sailfish import subdomain_runner, sym, sym_equilibrium -from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField +from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField, KernelPair from sailfish.lb_single import MacroKernels @@ -92,74 +92,6 @@ def get_pbc_kernels(self, runner): ret = MacroKernels(macro=macro_kernels, distributions=dist_kernels) return ret - def get_compute_kernels(self, runner, full_output, bulk): - gpu_rho = runner.gpu_field(self.rho) - gpu_phi = runner.gpu_field(self.phi) - gpu_v = runner.gpu_field(self.v) - gpu_map = runner.gpu_geo_map() - - gpu_dist1a = runner.gpu_dist(0, 0) - gpu_dist1b = runner.gpu_dist(0, 1) - gpu_dist2a = runner.gpu_dist(1, 0) - gpu_dist2b = runner.gpu_dist(1, 1) - - options = 0 - if full_output: - options |= 1 - if bulk: - options |= 2 - - options = np.uint32(options) - args1 = [gpu_map, gpu_dist1a, gpu_dist1b, gpu_dist2a, gpu_dist2b, - gpu_rho, gpu_phi] + gpu_v + [options] - args2 = [gpu_map, gpu_dist1b, gpu_dist1a, gpu_dist2b, gpu_dist2a, - gpu_rho, gpu_phi] + gpu_v + [options] - - macro_args1 = [gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi, - options] - macro_args2 = [gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi, - options] - - args_signature = 'P' * (len(args1) - 1) + 'i' - macro_signature = 'P' * (len(macro_args1) - 1) + 'i' - - if runner.gpu_scratch_space is not None: - macro_args1.append(runner.gpu_scratch_space) - macro_args2.append(runner.gpu_scratch_space) - macro_signature += 'P' - - args1.append(runner.gpu_scratch_space) - args2.append(runner.gpu_scratch_space) - args_signature += 'P' - - macro_kernels = [ - runner.get_kernel('PrepareMacroFields', macro_args1, - macro_signature, - needs_iteration=self.config.needs_iteration_num)] - - if self.config.access_pattern == 'AB': - macro_kernels.append( - runner.get_kernel('PrepareMacroFields', macro_args2, - macro_signature, - needs_iteration=self.config.needs_iteration_num)) - else: - macro_kernels.append(macro_kernels[-1]) - - sim_kernels = [ - runner.get_kernel('CollideAndPropagate', args1, - args_signature, - needs_iteration=self.config.needs_iteration_num)] - - if self.config.access_pattern == 'AB': - sim_kernels.append( - runner.get_kernel('CollideAndPropagate', args2, - args_signature, - needs_iteration=self.config.needs_iteration_num)) - else: - sim_kernels.append(sim_kernels[-1]) - - return zip(macro_kernels, sim_kernels) - def initial_conditions(self, runner): gpu_rho = runner.gpu_field(self.rho) gpu_phi = runner.gpu_field(self.phi) @@ -203,7 +135,8 @@ def constants(self): @classmethod def fields(cls): - return [ScalarField('rho'), ScalarField('phi', need_nn=True), VectorField('v')] + return [ScalarField('rho'), ScalarField('phi', need_nn=True), + VectorField('v'), ScalarField('phi_laplacian')] @classmethod def add_options(cls, group, dim): @@ -318,6 +251,101 @@ def _prepare_symbols(self): self.S.wxx.append(-Rational(1, 24)) self.S.wyy.append(-Rational(1, 24)) + def get_compute_kernels(self, runner, full_output, bulk): + gpu_rho = runner.gpu_field(self.rho) + gpu_phi = runner.gpu_field(self.phi) + gpu_lap = runner.gpu_field(self.phi_laplacian) + gpu_v = runner.gpu_field(self.v) + gpu_map = runner.gpu_geo_map() + + gpu_dist1a = runner.gpu_dist(0, 0) + gpu_dist1b = runner.gpu_dist(0, 1) + gpu_dist2a = runner.gpu_dist(1, 0) + gpu_dist2b = runner.gpu_dist(1, 1) + + options = 0 + if full_output: + options |= 1 + if bulk: + options |= 2 + + if hasattr(self, '_force_term_for_eq') and self._force_term_for_eq.get(1) == 0: + phi_args = [gpu_rho, gpu_phi] + else: + phi_args = [gpu_phi] + + options = np.uint32(options) + # Primary. + args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + + gpu_v + [gpu_lap, options]) + args1b = ([gpu_map, gpu_dist2a, gpu_dist2b] + phi_args + + gpu_v + [gpu_lap, options]) + # Secondary. + args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + + gpu_v + [gpu_lap, options]) + args2b = ([gpu_map, gpu_dist2b, gpu_dist2a] + phi_args + + gpu_v + [gpu_lap, options]) + + macro_args1 = [gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi, + options] + macro_args2 = [gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi, + options] + + args_a_signature = 'P' * (len(args1a) - 1) + 'i' + args_b_signature = 'P' * (len(args1b) - 1) + 'i' + macro_signature = 'P' * (len(macro_args1) - 1) + 'i' + + if runner.gpu_scratch_space is not None: + macro_args1.append(runner.gpu_scratch_space) + macro_args2.append(runner.gpu_scratch_space) + macro_signature += 'P' + + args1a.append(runner.gpu_scratch_space) + args2a.append(runner.gpu_scratch_space) + args1b.append(runner.gpu_scratch_space) + args2b.append(runner.gpu_scratch_space) + args_a_signature += 'P' + args_b_signature += 'P' + + macro = runner.get_kernel('FreeEnergyPrepareMacroFields', macro_args1, + macro_signature, + needs_iteration=self.config.needs_iteration_num) + + if self.config.access_pattern == 'AB': + macro_secondary = runner.get_kernel('FreeEnergyPrepareMacroFields', + macro_args2, + macro_signature, + needs_iteration=self.config.needs_iteration_num) + macro_pair = KernelPair(macro, macro_secondary) + else: + macro_pair = KernelPair(macro, macro) + + # Note: these two kernels need to be executed in order. + primary = [ + runner.get_kernel('FreeEnergyCollideAndPropagateFluid', args1a, + args_a_signature, + needs_iteration=self.config.needs_iteration_num), + runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam', args1b, + args_b_signature, + needs_iteration=self.config.needs_iteration_num) + ] + + if self.config.access_pattern == 'AB': + secondary = [ + runner.get_kernel('FreeEnergyCollideAndPropagateFluid', args2a, + args_a_signature, + needs_iteration=self.config.needs_iteration_num), + runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam', + args2b, + args_b_signature, + needs_iteration=self.config.needs_iteration_num) + ] + sim_pair = KernelPair(primary, secondary) + else: + sim_pair = KernelPair(primary, primary) + + return zip(macro_pair, sim_pair) + class LBBinaryFluidShanChen(LBBinaryFluidBase, LBForcedSim): """Binary fluid mixture using the Shan-Chen model.""" @@ -359,3 +387,90 @@ def update_context(self, ctx): ctx['sc_potential'] = self.config.sc_potential ctx['tau'] = sym.relaxation_time(self.config.visc) ctx['visc'] = self.config.visc + + def get_compute_kernels(self, runner, full_output, bulk): + gpu_rho = runner.gpu_field(self.rho) + gpu_phi = runner.gpu_field(self.phi) + gpu_v = runner.gpu_field(self.v) + gpu_map = runner.gpu_geo_map() + + gpu_dist1a = runner.gpu_dist(0, 0) + gpu_dist1b = runner.gpu_dist(0, 1) + gpu_dist2a = runner.gpu_dist(1, 0) + gpu_dist2b = runner.gpu_dist(1, 1) + + options = 0 + if full_output: + options |= 1 + if bulk: + options |= 2 + + options = np.uint32(options) + # Primary. + args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + + gpu_v + [options]) + args1b = ([gpu_map, gpu_dist2a, gpu_dist2b, gpu_rho, gpu_phi] + + gpu_v + [options]) + # Secondary. + args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + + gpu_v + [options]) + args2b = ([gpu_map, gpu_dist2b, gpu_dist2a, gpu_rho, gpu_phi] + + gpu_v + [options]) + + macro_args1 = ([gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi] + + gpu_v + [options]) + macro_args2 = ([gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi] + + gpu_v + [options]) + + args_a_signature = 'P' * (len(args1a) - 1) + 'i' + args_b_signature = 'P' * (len(args1b) - 1) + 'i' + macro_signature = 'P' * (len(macro_args1) - 1) + 'i' + + if runner.gpu_scratch_space is not None: + macro_args1.append(runner.gpu_scratch_space) + macro_args2.append(runner.gpu_scratch_space) + macro_signature += 'P' + + args1a.append(runner.gpu_scratch_space) + args2a.append(runner.gpu_scratch_space) + args1b.append(runner.gpu_scratch_space) + args2b.append(runner.gpu_scratch_space) + args_a_signature += 'P' + args_b_signature += 'P' + + macro = runner.get_kernel('ShanChenPrepareMacroFields', macro_args1, + macro_signature, + needs_iteration=self.config.needs_iteration_num) + + if self.config.access_pattern == 'AB': + macro_secondary = runner.get_kernel('ShanChenPrepareMacroFields', macro_args2, + macro_signature, + needs_iteration=self.config.needs_iteration_num) + macro_pair = KernelPair(macro, macro_secondary) + else: + macro_pair = KernelPair(macro, macro) + + # TODO(michalj): These kernels can actually run in parallel. + primary = [ + runner.get_kernel('ShanChenCollideAndPropagate0', args1a, + args_a_signature, + needs_iteration=self.config.needs_iteration_num), + runner.get_kernel('ShanChenCollideAndPropagate1', args1b, + args_b_signature, + needs_iteration=self.config.needs_iteration_num) + ] + + if self.config.access_pattern == 'AB': + secondary = [ + runner.get_kernel('ShanChenCollideAndPropagate0', args2a, + args_a_signature, + needs_iteration=self.config.needs_iteration_num), + runner.get_kernel('ShanChenCollideAndPropagate1', args2b, + args_b_signature, + needs_iteration=self.config.needs_iteration_num) + ] + sim_pair = KernelPair(primary, secondary) + else: + sim_pair = KernelPair(primary, primary) + + return zip(macro_pair, sim_pair) diff --git a/sailfish/lb_single.py b/sailfish/lb_single.py index 5136806b..adb92409 100644 --- a/sailfish/lb_single.py +++ b/sailfish/lb_single.py @@ -8,7 +8,7 @@ import numpy as np from sailfish import subdomain_runner, sym, sym_equilibrium -from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField +from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField, KernelPair MacroKernels = namedtuple('MacroKernels', 'distributions macro') @@ -71,13 +71,6 @@ def initial_conditions(self, runner): runner.exec_kernel('SetInitialConditions', args2, 'P'*len(args2)) def get_compute_kernels(self, runner, full_output, bulk): - """ - :param runner: SubdomainRunner object - :param full_output: if True, returns kernels that prepare fields for - visualization or saving into a file - :param bulk: if True, returns kernels that process the bulk domain, - otherwise returns kernels that process the subdomain boundary - """ gpu_rho = runner.gpu_field(self.rho) gpu_v = runner.gpu_field(self.v) gpu_dist1a = runner.gpu_dist(0, 0) @@ -109,21 +102,18 @@ def get_compute_kernels(self, runner, full_output, bulk): args2.append(runner.gpu_field(self.alpha)) signature += 'P' - kernels = [] - - kernels.append(runner.get_kernel( - 'CollideAndPropagate', args1, signature, - needs_iteration=self.config.needs_iteration_num)) + cnp_primary = runner.get_kernel( + 'CollideAndPropagate', args1, signature, + needs_iteration=self.config.needs_iteration_num) if self.config.access_pattern == 'AB': secondary_args = args2 if self.config.access_pattern == 'AB' else args1 - kernels.append(runner.get_kernel( - 'CollideAndPropagate', secondary_args, signature, - needs_iteration=self.config.needs_iteration_num)) + cnp_secondary = runner.get_kernel( + 'CollideAndPropagate', secondary_args, signature, + needs_iteration=self.config.needs_iteration_num) + return KernelPair([cnp_primary], [cnp_secondary]) else: - kernels.append(kernels[-1]) - - return kernels + return KernelPair([cnp_primary], [cnp_primary]) def get_pbc_kernels(self, runner): gpu_dist1a = runner.gpu_dist(0, 0) diff --git a/sailfish/subdomain_runner.py b/sailfish/subdomain_runner.py index e980b2a2..58d04a60 100644 --- a/sailfish/subdomain_runner.py +++ b/sailfish/subdomain_runner.py @@ -766,9 +766,9 @@ def step(self, sync_req): def _get_bulk_kernel(self, sync_req): if sync_req: - kernel = self._kernels_bulk_full[self._sim.iteration & 1] + kernel = self._kernels_bulk_full[self._sim.iteration & 1][0] else: - kernel = self._kernels_bulk_none[self._sim.iteration & 1] + kernel = self._kernels_bulk_none[self._sim.iteration & 1][0] return kernel, self._kernel_grid_bulk @@ -824,9 +824,9 @@ def _step_boundary(self, sync_req): if self._boundary_blocks is not None: if sync_req: - kernel = self._kernels_bnd_full[self._sim.iteration & 1] + kernel = self._kernels_bnd_full[self._sim.iteration & 1][0] else: - kernel = self._kernels_bnd_none[self._sim.iteration & 1] + kernel = self._kernels_bnd_none[self._sim.iteration & 1][0] grid = self._boundary_blocks else: kernel, grid = self._get_bulk_kernel(sync_req) @@ -1224,15 +1224,13 @@ def restore_checkpoint(self, fname): self._debug_set_dist(v, is_primary, dist_num) - def _prepare_compute_kernels(self, initial=False): - self._kernels_bulk_full = self._sim.get_compute_kernels(self, True, - True) - self._kernels_bulk_none = self._sim.get_compute_kernels(self, False, - True) - self._kernels_bnd_full = self._sim.get_compute_kernels(self, True, - False) - self._kernels_bnd_none = self._sim.get_compute_kernels(self, False, - False) + def _prepare_compute_kernels(self): + gck = self._sim.get_compute_kernels + + self._kernels_bulk_full = gck(self, True, True) + self._kernels_bulk_none = gck(self, False, True) + self._kernels_bnd_full = gck(self, True, False) + self._kernels_bnd_none = gck(self, False, False) def run(self): self.config.logger.info("Initializing subdomain.") @@ -1676,9 +1674,11 @@ def step(self, sync_req): # Actual simulation step. record_gpu_start(TimeProfile.BOUNDARY, str_calc) if has_boundary_split: - run(bnd_kernel_sim, grid_bnd, str_calc) + for k in bnd_kernel_sim: + run(k, grid_bnd, str_calc) else: - run(bulk_kernel_sim, grid_bulk, str_calc) + for k in bulk_kernel_sim: + run(k, grid_bulk, str_calc) ev = record_gpu_end(TimeProfile.BOUNDARY, str_calc) str_data.wait_for_event(ev) @@ -1689,7 +1689,8 @@ def step(self, sync_req): record_gpu_start(TimeProfile.BULK, str_calc) if has_boundary_split: - run(bulk_kernel_sim, grid_bulk, str_calc) + for k in bulk_kernel_sim: + run(k, grid_bulk, str_calc) self._apply_pbc(self._pbc_kernels.distributions) record_gpu_end(TimeProfile.BULK, str_calc) diff --git a/sailfish/sym_force.py b/sailfish/sym_force.py index c490bbc2..ef90ee20 100644 --- a/sailfish/sym_force.py +++ b/sailfish/sym_force.py @@ -33,7 +33,6 @@ def needs_accel(i, forces, force_couplings): if type(forces) is Undefined: return False - # TODO: handle symbolic forces here return ((i in forces.numeric or i in forces.symbolic) or needs_coupling_accel(i, force_couplings)) diff --git a/sailfish/templates/binary_fluid.mako b/sailfish/templates/binary_fluid.mako index cfb9f81a..f72c7628 100644 --- a/sailfish/templates/binary_fluid.mako +++ b/sailfish/templates/binary_fluid.mako @@ -3,29 +3,38 @@ import sailfish.node_type as nt import sympy %> +<% + # Necessary to support force reassignment in the free energy MRT model. + phi_needs_rho = (force_for_eq is not UNDEFINED and force_for_eq.get(1) == 0) +%> -<%def name="bgk_args_decl_sc()"> - float rho, float phi, float *iv0, float *ea0, float *ea1 - - -<%def name="bgk_args_decl_fe()"> - float rho, float phi, float lap1, float *iv0, float *grad1 +<%def name="bgk_args_decl_sc(grid_idx)"> + %if grid_idx == 0: + float rho, float *iv0, float *ea${grid_idx} + %else: + float phi, float *iv0, float *ea${grid_idx} + %endif -<%def name="bgk_args_sc()"> - g0m0, g1m0, v, sca0, sca1 +<%def name="bgk_args_decl_fe(grid_idx)"> + %if grid_idx == 0: + float rho, float phi, float lap1, float *iv0, float *grad1 + %else: + ${'float rho, ' if phi_needs_rho else ''} float phi, float lap1, float *iv0 + %endif -<%def name="bgk_args_fe()"> - g0m0, g1m0, lap1, v, grad1 +<%def name="bgk_args_fe(grid_idx)"> + %if grid_idx == 0: + g0m0, g1m0, lap1, v, grad1 + %else: + ${'g0m0, ' if phi_needs_rho else ''} g1m0, lap1, v + %endif -// In the free-energy model, the relaxation time is a local quantity. %if simtype == 'shan-chen': + ## In the free-energy model, the relaxation time is a local quantity. ${const_var} float tau0 = ${tau}f; // relaxation time -%endif - -%if simtype == 'shan-chen': // Relaxation time for the 2nd fluid component. %else: // Relaxation time for the order parameter field. @@ -47,7 +56,9 @@ ${const_var} float tau1 = ${tau_phi}f; <%namespace file="propagation.mako" import="*"/> <%namespace file="utils.mako" import="*"/> +%if simtype == 'free-energy': <%include file="finite_difference_optimized.mako"/> +%endif <%def name="init_dist_with_eq()"> %if simtype == 'free-energy': @@ -95,7 +106,8 @@ ${kernel} void SetInitialConditions( ${init_dist_with_eq()} } -${kernel} void PrepareMacroFields( +%if simtype == 'free-energy': +${kernel} void FreeEnergyPrepareMacroFields( ${global_ptr} int *map, ${global_ptr} float *dist1_in, ${global_ptr} float *dist2_in, @@ -106,23 +118,7 @@ ${kernel} void PrepareMacroFields( ${iteration_number_if_required()}) { ${local_indices_split()} - - int ncode = map[gi]; - int type = decodeNodeType(ncode); - - // Unused nodes do not participate in the simulation. - if (isExcludedNode(type)) - return; - - int orientation = decodeNodeOrientation(ncode); - - %if simtype == 'shan-chen': - // Do not update the macroscopic fields for nodes which do not - // represent any fluid. - if (!isWetNode(type)) { - return; - } - %endif + ${load_node_type()} // Do not not update the fields for pressure nodes, where by definition // they are constant. @@ -135,99 +131,86 @@ ${kernel} void PrepareMacroFields( Dist fi; float out; - %if sim._fields['rho'].abstract.need_nn: - getDist(&fi, dist1_in, gi ${iteration_number_arg_if_required()}); + if (isWetNode(type)) { + getDist(&fi, dist2_in, gi ${iteration_number_arg_if_required()}); get0thMoment(&fi, type, orientation, &out); - orho[gi] = out; - %endif + ophi[gi] = out; + } - %if simtype == 'free-energy': - if (isWetNode(type)) { - getDist(&fi, dist2_in, gi ${iteration_number_arg_if_required()}); + ## Assume neutral wetting for all walls by adjusting the phase gradient + ## near the wall. + ## + ## This wetting boundary condition implementation is as in option 2 in + ## Halim Kusumaatmaja's PhD thesis, p.18. + + ## Symbols used on the schematics below: + ## + ## W: wall node (current node, pointed to by 'gi') + ## F: fluid node + ## |: actual location of the wall + ## .: space between fluid nodes + ## x: node from which data is read + ## y: node to which data is being written + ## + ## The schematics assume a bc_wall_grad_order of 2. + %if nt.NTFullBBWall in node_types: + int helper_idx = gi; + ## Full BB: F . F | W + ## x ----> y + if (isNTFullBBWall(type)) { + switch (orientation) { + %for dir in grid.dir2vecidx.keys(): + case ${dir}: { // ${grid.dir_to_vec(dir)} + %if dim == 3: + helper_idx += ${rel_offset(*(bc_wall_grad_order*grid.dir_to_vec(dir)))}; + %else: + ## rel_offset() needs a 3-vector, so make the z-coordinate 0 + helper_idx += ${rel_offset(*(list(bc_wall_grad_order*grid.dir_to_vec(dir)) + [0]))}; + %endif + break; + } + %endfor + } + getDist(&fi, dist2_in, helper_idx ${iteration_number_arg_if_required()}); get0thMoment(&fi, type, orientation, &out); - ophi[gi] = out; + ophi[gi] = out - (${bc_wall_grad_order*bc_wall_grad_phase}); } - - ## Assume neutral wetting for all walls by adjusting the phase gradient - ## near the wall. - ## - ## This wetting boundary condition implementation is as in option 2 in - ## Halim Kusumaatmaja's PhD thesis, p.18. - - ## Symbols used on the schematics below: - ## - ## W: wall node (current node, pointed to by 'gi') - ## F: fluid node - ## |: actual location of the wall - ## .: space between fluid nodes - ## x: node from which data is read - ## y: node to which data is being written - ## - ## The schematics assume a bc_wall_grad_order of 2. - %if nt.NTFullBBWall in node_types: - int helper_idx = gi; - ## Full BB: F . F | W - ## x ----> y - if (isNTFullBBWall(type)) { - switch (orientation) { - %for dir in grid.dir2vecidx.keys(): - case ${dir}: { // ${grid.dir_to_vec(dir)} - %if dim == 3: - helper_idx += ${rel_offset(*(bc_wall_grad_order*grid.dir_to_vec(dir)))}; - %else: - ## rel_offset() needs a 3-vector, so make the z-coordinate 0 - helper_idx += ${rel_offset(*(list(bc_wall_grad_order*grid.dir_to_vec(dir)) + [0]))}; - %endif - break; - } - %endfor - } - getDist(&fi, dist2_in, helper_idx ${iteration_number_arg_if_required()}); - get0thMoment(&fi, type, orientation, &out); - ophi[gi] = out - (${bc_wall_grad_order*bc_wall_grad_phase}); - } - %endif ## NTFullBBWall - %if nt.NTHalfBBWall in node_types: - %if bc_wall_grad_order != 1: - __ONLY_FIRST_ORDER_GRADIENTS_ARE_SUPPORTED_FOR_HALF_BB_WETTING_WALLS__ - %endif - int helper_idx = gi; - - ## Half-way BB: F . W | U - ## x ----> y - if (isNTHalfBBWall(type)) { - switch (orientation) { - %for dir in grid.dir2vecidx.keys(): - case ${dir}: { // ${grid.dir_to_vec(dir)} - %if dim == 3: - helper_idx -= ${rel_offset(*(grid.dir_to_vec(dir)))}; - %else: - helper_idx -= ${rel_offset(*(list(grid.dir_to_vec(dir)) + [0]))}; - %endif - break; - } - %endfor - } - - ophi[helper_idx] = out - (${bc_wall_grad_order*bc_wall_grad_phase}); - } + %endif ## NTFullBBWall + %if nt.NTHalfBBWall in node_types: + %if bc_wall_grad_order != 1: + __ONLY_FIRST_ORDER_GRADIENTS_ARE_SUPPORTED_FOR_HALF_BB_WETTING_WALLS__ %endif - %else: ## shan-chen - getDist(&fi, dist2_in, gi ${iteration_number_arg_if_required()}); - get0thMoment(&fi, type, orientation, &out); - ophi[gi] = out; + int helper_idx = gi; + + ## Half-way BB: F . W | U + ## x ----> y + if (isNTHalfBBWall(type)) { + switch (orientation) { + %for dir in grid.dir2vecidx.keys(): + case ${dir}: { // ${grid.dir_to_vec(dir)} + %if dim == 3: + helper_idx -= ${rel_offset(*(grid.dir_to_vec(dir)))}; + %else: + helper_idx -= ${rel_offset(*(list(grid.dir_to_vec(dir)) + [0]))}; + %endif + break; + } + %endfor + } + + ophi[helper_idx] = out - (${bc_wall_grad_order*bc_wall_grad_phase}); + } %endif } -${kernel} void CollideAndPropagate( +${kernel} void FreeEnergyCollideAndPropagateFluid( ${global_ptr} int *map, ${global_ptr} float *dist1_in, ${global_ptr} float *dist1_out, - ${global_ptr} float *dist2_in, - ${global_ptr} float *dist2_out, ${global_ptr} float *gg0m0, ${global_ptr} float *gg1m0, ${kernel_args_1st_moment('ov')} + ${global_ptr} float *gg1laplacian, int options ${scratch_space_if_required()} ${iteration_number_if_required()}) @@ -237,74 +220,83 @@ ${kernel} void CollideAndPropagate( ${load_node_type()} ${guo_density_node_index_shift_intro()} - %if simtype == 'free-energy': - float lap1, grad1[${dim}]; + float lap1, grad1[${dim}]; + if (isWetNode(type)) { + laplacian_and_grad(gg1m0, 1, gi, &lap1, grad1, gx, gy ${', gz' if dim == 3 else ''}); + } - if (isWetNode(type)) { - %if dim == 2: - laplacian_and_grad(gg1m0, 1, gi, &lap1, grad1, gx, gy); - %else: - laplacian_and_grad(gg1m0, 1, gi, &lap1, grad1, gx, gy, gz); - %endif - } - %elif simtype == 'shan-chen': - ${sc_calculate_accel()} - %endif + // Macroscopic quantities for the current cell. + float g0m0, v[${dim}], g1m0; - // cache the distributions in local variables - Dist d0, d1; + // Cache the distributions in local variables. + Dist d0; getDist(&d0, dist1_in, gi ${iteration_number_arg_if_required()}); - getDist(&d1, dist2_in, gi ${iteration_number_arg_if_required()}); - + g1m0 = gg1m0[gi]; ${guo_density_restore_index()} - // macroscopic quantities for the current cell - float g0m0, v[${dim}], g1m0; + getMacro(&d0, ncode, type, orientation, &g0m0, v ${dynamic_val_call_args()}); - %if simtype == 'free-energy': - getMacro(&d0, ncode, type, orientation, &g0m0, v ${dynamic_val_call_args()}); - // TODO(michalj): Is this really needed? - get0thMoment(&d1, type, orientation, &g1m0); - %elif simtype == 'shan-chen': - ${sc_macro_fields()} - %endif - - precollisionBoundaryConditions(&d0, ncode, type, orientation, &g0m0, v); - precollisionBoundaryConditions(&d1, ncode, type, orientation, &g1m0, v); + // Save laplacian and velocity to global memory so that they can be reused + // in the relaxation of the order parameter field. + ovx[gi] = v[0]; + ovy[gi] = v[1]; + ${'ovz[gi] = v[2]' if dim == 3 else ''}; + gg1laplacian[gi] = lap1; - %if simtype == 'shan-chen': - ${relaxate(bgk_args_sc)} - %elif simtype == 'free-energy': - ${relaxate(bgk_args_fe)} + %if phi_needs_rho: + gg0m0[gi] = g0m0; %endif - // FIXME: In order for the half-way bounce back boundary condition to work, a layer of unused - // nodes currently has to be placed behind the wall layer. + precollisionBoundaryConditions(&d0, ncode, type, orientation, &g0m0, v); + ${relaxate(bgk_args_fe, 0)} postcollisionBoundaryConditions(&d0, ncode, type, orientation, &g0m0, v, gi, dist1_out); - postcollisionBoundaryConditions(&d1, ncode, type, orientation, &g1m0, v, gi, dist2_out); - ${guo_density_node_index_shift_final()} + ${check_invalid_values()} + ${save_macro_fields(velocity=False)} + ${propagate('dist1_out', 'd0')} +} - checkInvalidValues(&d0, ${position()}); - checkInvalidValues(&d1, ${position()}); - - // Only save the macroscopic quantities if requested to do so. - if (options & OPTION_SAVE_MACRO_FIELDS) { - %if simtype == 'shan-chen': - if (isWetNode(type)) - %endif - { - ovx[gi] = v[0]; - ovy[gi] = v[1]; - %if dim == 3: - ovz[gi] = v[2]; - %endif - } - } - +${kernel} void FreeEnergyCollideAndPropagateOrderParam( + ${global_ptr} int *map, + ${global_ptr} float *dist1_in, + ${global_ptr} float *dist1_out, + ${global_ptr + ' float *gg0m0,' if phi_needs_rho else ''} + ${global_ptr} float *gg1m0, + ${kernel_args_1st_moment('ov')} + ${global_ptr} float *gg1laplacian, + int options + ${scratch_space_if_required()} + ${iteration_number_if_required()}) +{ + ${local_indices_split()} + ${shared_mem_propagation_vars()} + ${load_node_type()} + ${guo_density_node_index_shift_intro()} + // Cache the distributions in local variables. + Dist d0; + getDist(&d0, dist1_in, gi ${iteration_number_arg_if_required()}); + ${guo_density_restore_index()} + float lap1 = gg1laplacian[gi]; + float g1m0, v[${dim}]; + ${'float g0m0 = gg0m0[gi];' if phi_needs_rho else ''} + + v[0] = ovx[gi]; + v[1] = ovy[gi]; + ${'v[2] = ovz[gi]' if dim == 3 else ''}; + g1m0 = gg1m0[gi]; + + precollisionBoundaryConditions(&d0, ncode, type, orientation, &g1m0, v); + ${relaxate(bgk_args_fe, 1)} + postcollisionBoundaryConditions(&d0, ncode, type, orientation, &g1m0, v, gi, dist1_out); + ${guo_density_node_index_shift_final()} + ${check_invalid_values()} ${propagate('dist1_out', 'd0')} - ${barrier()} - ${propagate('dist2_out', 'd1')} } +%endif ## free-energy + +%if simtype == 'shan-chen': +<%include file="binary_shan_chen.mako"/> +%endif ## shan-chen + <%include file="util_kernels.mako"/> diff --git a/sailfish/templates/binary_shan_chen.mako b/sailfish/templates/binary_shan_chen.mako new file mode 100644 index 00000000..26d9fff2 --- /dev/null +++ b/sailfish/templates/binary_shan_chen.mako @@ -0,0 +1,121 @@ +<%! + from sailfish import sym + import sailfish.node_type as nt + import sympy +%> + +<%namespace file="opencl_compat.mako" import="*" name="opencl_compat"/> +<%namespace file="utils.mako" import="*"/> +<%namespace file="kernel_common.mako" import="*" name="kernel_common"/> +<%namespace file="code_common.mako" import="*"/> +<%namespace file="boundary.mako" import="*" name="boundary"/> +<%namespace file="relaxation.mako" import="*" name="relaxation"/> +<%namespace file="propagation.mako" import="*"/> +<%namespace file="shan_chen.mako" import="*" name="shan_chen"/> + +<%def name="bgk_args_sc(grid_idx)"> + g${grid_idx}m0, v, sca0 + + +${kernel} void ShanChenPrepareMacroFields( + ${global_ptr} int *map, + ${global_ptr} float *dist1_in, + ${global_ptr} float *dist2_in, + ${global_ptr} float *orho0, + ${global_ptr} float *orho1, + ${kernel_args_1st_moment('ov')} + int options + ${scratch_space_if_required()} + ${iteration_number_if_required()}) +{ + ${local_indices_split()} + ${load_node_type()} + + // Do not update the macroscopic fields for nodes which do not + // represent any fluid. + if (!isWetNode(type)) { + return; + } + + // Do not not update the fields for pressure nodes, where by definition + // they are constant. + %for node_type in (nt.NTGuoDensity, nt.NTEquilibriumDensity, nt.NTZouHeDensity): + %if node_type in node_types: + if (is${node_type.__name__}(type)) { return; } + %endif + %endfor + + Dist fi; + float rho0, rho1; + float v[${dim}]; + + // Velocity requires input from both lattices, so we calculate it here so + // that it can just be read out in the collision kernel. + v[0] = 0.0f; + v[1] = 0.0f; + ${'v[2] = 0.0f;' if dim == 3 else ''} + + getDist(&fi, dist1_in, gi ${iteration_number_arg_if_required()}); + get0thMoment(&fi, type, orientation, &rho0); + compute_1st_moment(&fi, v, 0, 1.0f/tau0); + orho0[gi] = rho0; + + // TODO: try moving this line earlier and see what the performance impact is. + getDist(&fi, dist2_in, gi ${iteration_number_arg_if_required()}); + get0thMoment(&fi, type, orientation, &rho1); + compute_1st_moment(&fi, v, 1, 1.0f/tau1); + orho1[gi] = rho1; + + // Velocity becomes a weighted average of the values for invidual components. + float total_rho = rho0 / tau0 + rho1 / tau1; + %for i in range(0, dim): + v[${i}] /= total_rho; + %endfor + + ovx[gi] = v[0]; + ovy[gi] = v[1]; + ${'ovz[gi] = v[2];' if dim == 3 else ''} +} + +%for grid_idx in range(0, 2): +${kernel} void ShanChenCollideAndPropagate${grid_idx}( + ${global_ptr} int *map, + ${global_ptr} float *dist1_in, + ${global_ptr} float *dist1_out, + ${global_ptr} float *gg0m0, + ${global_ptr} float *gg1m0, + ${kernel_args_1st_moment('ov')} + int options + ${scratch_space_if_required()} + ${iteration_number_if_required()}) +{ + ${local_indices_split()} + ${shared_mem_propagation_vars()} + ${load_node_type()} + ${guo_density_node_index_shift_intro()} + float g${grid_idx}m0 = gg${grid_idx}m0[gi]; + ${sc_calculate_force(grid_idx=grid_idx)} + + // Cache the distributions in local variables. + Dist d0; + getDist(&d0, dist1_in, gi ${iteration_number_arg_if_required()}); + + // Macroscopic quantities for the current cell. + float v[${dim}]; + v[0] = ovx[gi]; + v[1] = ovy[gi]; + ${'v[2] = ovz[gi]' if dim == 3 else ''}; + + ${guo_density_restore_index()} + + precollisionBoundaryConditions(&d0, ncode, type, orientation, &g${grid_idx}m0, v); + ${relaxate(bgk_args_sc, grid_idx)} + + // FIXME: In order for the half-way bounce back boundary condition to work, a layer of unused + // nodes currently has to be placed behind the wall layer. + postcollisionBoundaryConditions(&d0, ncode, type, orientation, &g${grid_idx}m0, v, gi, dist1_out); + ${guo_density_node_index_shift_final()} + ${check_invalid_values()} + ${propagate('dist1_out', 'd0')} +} +%endfor diff --git a/sailfish/templates/boundary.mako b/sailfish/templates/boundary.mako index 0c1480a7..a77e143c 100644 --- a/sailfish/templates/boundary.mako +++ b/sailfish/templates/boundary.mako @@ -52,13 +52,14 @@ ${device_func} inline void node_param_get_vector(const int idx, float *out ${device_func} inline float node_param_get_scalar(const int idx ${dynamic_val_args_decl()}) { %if (time_dependence or space_dependence) and symbol_idx_map: if (idx >= ${non_symbolic_idxs}) { - float out; switch (idx) { %for key, val in symbol_idx_map.iteritems(): %if len(val) == 1: - case ${key}: + case ${key}: { + float out; time_dep_param_${key}(&out ${dynamic_val_args()}); return out; + } %endif %endfor default: diff --git a/sailfish/templates/kernel_common.mako b/sailfish/templates/kernel_common.mako index eb61a70f..f5d493bc 100644 --- a/sailfish/templates/kernel_common.mako +++ b/sailfish/templates/kernel_common.mako @@ -1,5 +1,6 @@ <%! - from sailfish import sym + from sailfish import sym + import sailfish.node_type as nt %> <%page args="bgk_args_decl"/> @@ -124,6 +125,30 @@ int orientation = decodeNodeOrientation(ncode); +<%def name="check_invalid_values()"> + ## Grad outflow nodes use invalid values to tag directions lacking distribution + ## data. + if (isWetNode(type) ${'&& !isNTGradFreeflow(type)' if nt.NTGradFreeflow in node_types else ''}) { + checkInvalidValues(&d0, ${position()}); + } + + +<%def name="save_macro_fields(density=True, velocity=True)"> + // Only save the macroscopic quantities if requested to do so. + if ((options & OPTION_SAVE_MACRO_FIELDS) && isWetNode(type) + ## Nodes using the Grad approximation use the velocity from the + ## previous time step to compute the approximated distributions. + ${'|| isNTGradFreeflow(type)' if nt.NTGradFreeflow in node_types else ''}) { + gg0m0[gi] = g0m0 ${' +1.0f' if config.minimize_roundoff else ''}; + + %if not initialization and velocity: + ovx[gi] = v[0]; + ovy[gi] = v[1]; + ${'ovz[gi] = v[2]' if dim == 3 else ''}; + %endif + } + + ## Defines local indices for bulk kernels. ## This is the same as local_indices(), but with proper offsets to skip ## the boundary. @@ -410,10 +435,10 @@ <%namespace file="relaxation.mako" import="*" name="relaxation"/> %endif -<%def name="position_decl()"> - int gx, int gy +<%def name="position_decl(prefix='g')"> + int ${prefix}x, int ${prefix}y %if dim == 3: - , int gz + , int ${prefix}z %endif diff --git a/sailfish/templates/relaxation.mako b/sailfish/templates/relaxation.mako index e0e5356b..cb9d8381 100644 --- a/sailfish/templates/relaxation.mako +++ b/sailfish/templates/relaxation.mako @@ -14,43 +14,44 @@ ${mrt.body()} %endif %if model == 'mrt' and simtype == 'free-energy': -${device_func} inline void FE_MRT_relaxate(${bgk_args_decl()}, -%for i in range(0, len(grids)): - Dist *d${i}, -%endfor +%for grid_idx in range(0, len(grids)): +${device_func} inline void FE_MRT_relaxate${grid_idx}(${bgk_args_decl(grid_idx)}, Dist *d0, int node_type ${dynamic_val_args_decl()}) { - ${bgk_relaxation_preamble()} + ${body_force(force_for_eq.get(grid_idx, grid_idx))} + ${bgk_relaxation_preamble(grid_idx)} + ## The acceleration vector needs to declared if no force was set in the + ## previous call to body_force(). + ${body_force(grid_idx, vector_decl=(force_for_eq.get(grid_idx, -1) is None))} - %for i in range(0, len(grids)): - %for idx in grid.idx_name: - ## Use the BGK approximation for the relaxation of the order parameter field. - %if i == 1: - d${i}->${idx} += (feq${i}.${idx} - d${i}->${idx}) / tau${i}; - %else: - feq${i}.${idx} = d${i}->${idx} - feq${i}.${idx}; - %endif - %endfor + %for idx in grid.idx_name: + ## Use the BGK approximation for the relaxation of the order parameter field. + %if grid_idx == 1: + d0->${idx} += (feq0.${idx} - d0->${idx}) / tau1; + %else: + feq0.${idx} = d0->${idx} - feq0.${idx}; + %endif %endfor - %for dst, src in sym.free_energy_mrt(sim.grid, 'd0', 'feq0'): - ${dst} -= ${sym.make_float(src)}; - %endfor + %if grid_idx == 0: + %for dst, src in sym.free_energy_mrt(sim.grid, 'd0', 'feq0'): + ${dst} -= ${sym.make_float(src)}; + %endfor + %endif - %for i in range(0, len(grids)): - ## Is there a force acting on the current grid? - %if sym_force.needs_accel(i, forces, force_couplings): - ${fluid_velocity(i)}; + ## Is there a force acting on the current grid? + %if sym_force.needs_accel(grid_idx, forces, force_couplings): + ${fluid_velocity(grid_idx)}; - %for val, idx in zip(sym_force.free_energy_external_force(sim, grid_num=i), grid.idx_name): - d${i}->${idx} += ${cex(val)}; - %endfor - %endif - %endfor + %for val, idx in zip(sym_force.free_energy_external_force(sim, grid_num=grid_idx), grid.idx_name): + d0->${idx} += ${cex(val)}; + %endfor + %endif - ${fluid_output_velocity()} + ${fluid_output_velocity(grid_idx)} } +%endfor %endif ## model == mrt && simtype == 'free-energy' %if model == 'elbm': @@ -76,7 +77,7 @@ ${device_func} inline void ELBM_relaxate(${bgk_args_decl()}, Dist* d0 %> float v0[${dim}]; - ${body_force()} + ${body_force(grid_idx=0)} ${fluid_velocity(0)}; ## Local variables used by the equilibrium. @@ -109,7 +110,7 @@ ${device_func} inline void ELBM_relaxate(${bgk_args_decl()}, Dist* d0 d0->${idx} += alpha * fneq0.${idx}; %endfor - ${fluid_output_velocity()} + ${fluid_output_velocity(0)} } %endif ## model == elbm @@ -117,46 +118,42 @@ ${device_func} inline void ELBM_relaxate(${bgk_args_decl()}, Dist* d0 // // Performs the relaxation step in the BGK model given the density rho, // the velocity v and the distribution fi. -// -${device_func} inline void BGK_relaxate(${bgk_args_decl()}, -%for i in range(0, len(grids)): - Dist *d${i}, -%endfor - int node_type, int ncode +%for grid_idx in range(len(grids)): +${device_func} inline void BGK_relaxate${grid_idx}(${bgk_args_decl(grid_idx)}, + Dist *d0, int node_type, int ncode ${dynamic_val_args_decl()}) { - ${bgk_relaxation_preamble()} + ${body_force(grid_idx)} + ${bgk_relaxation_preamble(grid_idx)} - %for i in range(0, len(grids)): - %for idx in grid.idx_name: - d${i}->${idx} += (feq${i}.${idx} - d${i}->${idx}) / tau${i}; - %endfor + %for idx in grid.idx_name: + d0->${idx} += (feq0.${idx} - d0->${idx}) / tau${grid_idx}; + %endfor - %if nt.NTGuoDensity in node_types: - // The total form of the postcollision boundary node distribution value - // with the Guo boundary conditions is as follows: - // - // f_post(O) = f_eq(O) + f(B) - f_eq(B) + omega * (f_eq(B) - f(B)) - // - // where O is the boundary node and B is the fluid node pointed to by the - // boundary node normal vector. The Guo boudary condtiions are implemented - // so that all the standard processing proceeds for node B first, and the - // correction for node O is added as a postcollision boundary condition. - // - // The code below takes care of the -f_eq(B) of the formula. - if (isNTGuoDensity(node_type)) { - %for idx in grid.idx_name: - d${i}->${idx} -= feq${i}.${idx}; - %endfor - } - %endif + %if nt.NTGuoDensity in node_types: + // The total form of the postcollision boundary node distribution value + // with the Guo boundary conditions is as follows: + // + // f_post(O) = f_eq(O) + f(B) - f_eq(B) + omega * (f_eq(B) - f(B)) + // + // where O is the boundary node and B is the fluid node pointed to by the + // boundary node normal vector. The Guo boudary condtiions are implemented + // so that all the standard processing proceeds for node B first, and the + // correction for node O is added as a postcollision boundary condition. + // + // The code below takes care of the -f_eq(B) of the formula. + if (isNTGuoDensity(node_type)) { + %for idx in grid.idx_name: + d0->${idx} -= feq0.${idx}; + %endfor + } + %endif - ## Is there a force acting on the current grid? - %if sym_force.needs_accel(i, forces, force_couplings): - ${fluid_velocity(i)}; - ${apply_body_force(i)}; - %endif - %endfor + ## Is there a force acting on the current grid? + %if sym_force.needs_accel(grid_idx, forces, force_couplings): + ${fluid_velocity(grid_idx)}; + ${apply_body_force(grid_idx)}; + %endif // FIXME: This should be moved to postcollision boundary conditions. %if nt.NTGuoDensity in node_types: @@ -166,35 +163,27 @@ ${device_func} inline void BGK_relaxate(${bgk_args_decl()}, float par_phi = 1.0f; tau0 = tau_a; - %for i, eq in enumerate([f(g, config) for f, g, in zip(equilibria, grids)]): - %for local_var in eq.local_vars: - const float ${cex(local_var.lhs)} = - ${cex(local_var.rhs, rho='par_rho', phi='par_phi')}; - %endfor - %for feq, idx in zip(eq.expression, grid.idx_name): - d${i}->${idx} += ${cex(feq, rho='par_rho', phi='par_phi')}; - %endfor + <% eq = equilibria[grid_idx](grids[grid_idx], config) %> + %for local_var in eq.local_vars: + const float ${cex(local_var.lhs)} = + ${cex(local_var.rhs, rho='par_rho', phi='par_phi')}; + %endfor + %for feq, idx in zip(eq.expression, grid.idx_name): + d0->${idx} += ${cex(feq, rho='par_rho', phi='par_phi')}; %endfor } %endif - ${fluid_output_velocity()} + ${fluid_output_velocity(grid_idx)} } +%endfor %endif -<%def name="_relaxate(bgk_args)"> +<%def name="_relaxate(bgk_args, grid_idx)"> %if model == 'bgk': - BGK_relaxate(${bgk_args()}, -%for i in range(0, len(grids)): - &d${i}, -%endfor - type, ncode ${dynamic_val_call_args()}); + BGK_relaxate${grid_idx}(${bgk_args(grid_idx)}, &d0, type, ncode ${dynamic_val_call_args()}); %elif model == 'mrt' and simtype == 'free-energy': - FE_MRT_relaxate(${bgk_args()}, -%for i in range(0, len(grids)): - &d${i}, -%endfor - type ${dynamic_val_call_args()}); + FE_MRT_relaxate${grid_idx}(${bgk_args(grid_idx)}, &d0, type ${dynamic_val_call_args()}); %elif model == 'elbm': ELBM_relaxate(${bgk_args()}, &d0 ${dynamic_val_call_args()} ${cond(alpha_output, ', options, alpha + gi')}); %else: @@ -202,10 +191,10 @@ ${device_func} inline void BGK_relaxate(${bgk_args_decl()}, %endif -<%def name="relaxate(bgk_args)"> +<%def name="relaxate(bgk_args, grid_idx=0)"> %if relaxation_enabled: if (isWetNode(type)) { - ${_relaxate(bgk_args)} + ${_relaxate(bgk_args, grid_idx)} } %endif diff --git a/sailfish/templates/relaxation_common.mako b/sailfish/templates/relaxation_common.mako index f801def4..70581862 100644 --- a/sailfish/templates/relaxation_common.mako +++ b/sailfish/templates/relaxation_common.mako @@ -5,30 +5,30 @@ <%namespace file="code_common.mako" import="*"/> ## Defines the actual acceleration vectors. -<%def name="body_force()"> +<%def name="body_force(grid_idx, vector_decl=True)"> %if forces is not UNDEFINED and (forces.numeric or forces.symbolic): - // Body force acceleration. - %if forces.symbolic and time_dependence: - float phys_time = get_time_from_iteration(iteration_number); - %endif - %for i in range(0, len(grids)): - %if sym_force.needs_accel(i, forces, {}): - %if not sym_force.needs_coupling_accel(i, force_couplings): - float ea${i}[${dim}]; - %for j in range(0, dim): - ea${i}[${j}] = ${cex(sym_force.body_force_accel(i, j, forces, accel=True))}; - %endfor - %else: - ## If the current grid has a Shan-Chen force acting on it, the acceleration vector - ## is already externally defined in the Shan-Chen code. - %for j in range(0, dim): - %if i in forces.symbolic or i in forces.numeric: - ea${i}[${j}] += ${cex(sym_force.body_force_accel(i, j, forces, accel=True))}; - %endif - %endfor + %if sym_force.needs_accel(grid_idx, forces, {}): + %if forces.symbolic and time_dependence: + float phys_time = get_time_from_iteration(iteration_number); + %endif + // Body force acceleration. + %if not sym_force.needs_coupling_accel(grid_idx, force_couplings): + %if vector_decl: + float ea${grid_idx}[${dim}]; %endif + %for j in range(0, dim): + ea${grid_idx}[${j}] = ${cex(sym_force.body_force_accel(grid_idx, j, forces, accel=True))}; + %endfor + %else: + ## If the current grid has a Shan-Chen force acting on it, the acceleration vector + ## is already externally defined in the Shan-Chen code. + %for j in range(0, dim): + %if grid_idx in forces.symbolic or grid_idx in forces.numeric: + ea${grid_idx}[${j}] += ${cex(sym_force.body_force_accel(grid_idx, j, forces, accel=True))}; + %endif + %endfor %endif - %endfor + %endif %endif @@ -37,7 +37,7 @@ // Guo's method, eqs. 19 and 20 from 10.1103/PhysRevE.65.046308. const float pref = ${cex(sym_force.guo_external_force_pref(grids[i], config, grid_num=i))}; %for val, idx in zip(sym_force.guo_external_force(grid, grid_num=i), grid.idx_name): - d${i}->${idx} += ${cex(val)}; + d0->${idx} += ${cex(val)}; %endfor } @@ -46,7 +46,7 @@ { // Exact difference method. %for feq_shifted, idx in zip(sym_force.edm_shift_velocity(equilibria[i](grids[i], config).expression), grid.idx_name): - d${i}->${idx} += ${cex(feq_shifted)} - feq${i}.${idx}; + d0->${idx} += ${cex(feq_shifted)} - feq${i}.${idx}; %endfor } @@ -57,7 +57,7 @@ <%def name="apply_body_force(i)"> %if simtype == 'free-energy': %for val, idx in zip(sym_force.free_energy_external_force(sim, grid_num=i), grid.idx_name): - d${i}->${idx} += ${cex(val)}; + d0->${idx} += ${cex(val)}; %endfor %else: %if force_implementation == 'guo': @@ -104,10 +104,10 @@ %endif -<%def name="update_relaxation_time()"> +<%def name="update_relaxation_time(grid_idx)"> ## In models where the relaxation time is constant everywhere, tau0 is a global ## constant and does not need to be declared here. - %if simtype == 'free-energy': + %if simtype == 'free-energy' and grid_idx == 0: // Linear interpolation of relaxation time. float tau0 = tau_b + (phi + 1.0f) * (tau_a - tau_b) / 2.0f; if (phi < -1.0f) { @@ -119,60 +119,54 @@ %if subgrid == 'les-smagorinsky': // Compute relaxation time using the standard viscosity-relaxation time relation. - %for i in range(0, len(grids)): - float tau${i} = 0.5f + 3.0f * visc; - %endfor + float tau0 = 0.5f + 3.0f * visc; - ## FIXME: This will not work properly for multifluid models. + // TODO(michalj): Fix this for multifluid models. // Modify the relaxation time proportionally to the modulus of the local strain rate tensor. // The 2nd order tensor formed from the equilibrium distributions is rho / 3 * \delta_{ab} + rho u_a u_b { float tmp, strain; - %for i in range(0, len(grids)): - strain = 0.0f; - - // Off-diagonal components, count twice for symmetry reasons. - %for a in range(0, dim): - %for b in range(a+1, dim): - tmp = ${cex(sym.ex_flux(grid, 'd%d' % i, a, b, config), pointers=True)} - - ${cex(sym.S.rho * grid.v[a] * grid.v[b])}; - strain += 2.0f * tmp * tmp; - %endfor - %endfor + strain = 0.0f; - // Diagonal components. - %for a in range(0, dim): - tmp = ${cex(sym.ex_flux(grid, 'd%d' % i, a, a, config), pointers=True)} - - ${cex(sym.S.rho * (grid.v[a] * grid.v[b] + grid.cssq))}; - strain += tmp * tmp; + // Off-diagonal components, count twice for symmetry reasons. + %for a in range(0, dim): + %for b in range(a+1, dim): + tmp = ${cex(sym.ex_flux(grid, 'd0', a, b, config), pointers=True)} - + ${cex(sym.S.rho * grid.v[a] * grid.v[b])}; + strain += 2.0f * tmp * tmp; %endfor + %endfor - // Form of the relaxation time correction as in comp-gas/9401004v1. - tau${i} += (sqrtf(visc*visc + 18.0f * ${cex(smagorinsky_const**2)} * - sqrtf(strain)) - visc) / 2.0f; + // Diagonal components. + %for a in range(0, dim): + tmp = ${cex(sym.ex_flux(grid, 'd0', a, a, config), pointers=True)} - + ${cex(sym.S.rho * (grid.v[a] * grid.v[b] + grid.cssq))}; + strain += tmp * tmp; %endfor + + // Form of the relaxation time correction as in comp-gas/9401004v1. + tau0 += (sqrtf(visc*visc + 18.0f * ${cex(smagorinsky_const**2)} * + sqrtf(strain)) - visc) / 2.0f; } %endif ## Code common to all BGK-like relaxation models. -<%def name="bgk_relaxation_preamble()"> +<%def name="bgk_relaxation_preamble(grid_idx=0)"> float v0[${dim}]; - ${body_force()} - %for i, eq in enumerate([f(g, config) for f, g in zip(equilibria, grids)]): - Dist feq${i}; - ${fluid_velocity(i, equilibrium=True)}; + <% eq = equilibria[grid_idx](grids[grid_idx], config) %> + Dist feq0; + ${fluid_velocity(grid_idx, equilibrium=True)}; - %for local_var in eq.local_vars: - float ${cex(local_var.lhs)} = ${cex(local_var.rhs)}; - %endfor + %for local_var in eq.local_vars: + float ${cex(local_var.lhs)} = ${cex(local_var.rhs)}; + %endfor - %for feq, idx in zip(eq.expression, grid.idx_name): - feq${i}.${idx} = ${cex(feq)}; - %endfor + %for feq, idx in zip(eq.expression, grid.idx_name): + feq0.${idx} = ${cex(feq)}; %endfor - ${update_relaxation_time()} + ${update_relaxation_time(grid_idx)} diff --git a/sailfish/templates/shan_chen.mako b/sailfish/templates/shan_chen.mako index 3ac59ccd..7931f241 100644 --- a/sailfish/templates/shan_chen.mako +++ b/sailfish/templates/shan_chen.mako @@ -4,60 +4,28 @@ <%namespace file="utils.mako" import="get_field_loc,get_field_off"/> <%namespace file="code_common.mako" import="cex"/> +<%namespace file="kernel_common.mako" import="*"/> -<%def name="sc_calculate_accel()"> -## -## Declare and evaluate the Shan-Chen accelerations. -## - %for x in set(sum(force_couplings.keys(), ())): - float sca${x}[${dim}]; - %endfor +## Declares and evaluate the Shan-Chen forces. +<%def name="sc_calculate_force(grid_idx=0)"> + float sca0[${dim}]; if (isWetNode(type)) { %for dists, coupling_const in force_couplings.iteritems(): - // Interaction force between two components. - %if dists[0] != dists[1]: - %if dim == 2: - shan_chen_force(gi, gg${dists[0]}m0, gg${dists[1]}m0, - ${coupling_const}, sca${dists[0]}, sca${dists[1]}, gx, gy); - %else: - shan_chen_force(gi, gg${dists[0]}m0, gg${dists[1]}m0, - ${coupling_const}, sca${dists[0]}, sca${dists[1]}, gx, gy, gz); - %endif - // Self-interaction force of a single component. - %else: - %if dim == 2: - shan_chen_force_self(gi, gg${dists[0]}m0, ${coupling_const}, sca${dists[0]}, gx, gy); - %else: - shan_chen_force_self(gi, gg${dists[0]}m0, ${coupling_const}, sca${dists[0]}, gx, gy, gz); - %endif + %if dists[0] == grid_idx: + shan_chen_force(gi, g${grid_idx}m0, gg${dists[1]}m0, + ${coupling_const}, sca0, ${position()}); + %elif dists[1] == grid_idx: + shan_chen_force(gi, g${grid_idx}m0, gg${dists[0]}m0, + ${coupling_const}, sca0, ${position()}); %endif %endfor - } - - -<%def name="sc_macro_fields()"> - // Calculates the density and velocity for the Shan-Chen coupled fields. - // Density and velocity become weighted averages of the values for invidual components. - float total_dens; - %for i, x in enumerate(set(sum(force_couplings.keys(), ()))): - get0thMoment(&d${x}, type, orientation, &g${x}m0); - compute_1st_moment(&d${x}, v, ${i}, 1.0f/tau${x}); - %endfor - - total_dens = 0.0f; - %for x in set(sum(force_couplings.keys(), ())): - total_dens += g${x}m0 / tau${x}; - %endfor - - // Convert momentum and force into velocity and acceleration. - %for i in range(0, dim): - %for x in set(sum(force_couplings.keys(), ())): - sca${x}[${i}] /= g${x}m0; + // Convert momentum and force into velocity and acceleration. + %for i in range(0, dim): + sca0[${i}] /= g${grid_idx}m0; %endfor - v[${i}] /= total_dens; - %endfor + } // Calculates the Shan-Chan pseudopotential. @@ -67,80 +35,24 @@ ${device_func} inline float sc_ppot(${global_ptr} float *field, int gi) return ${cex(sym.SHAN_CHEN_POTENTIALS[sc_potential]('lfield'))}; } -// Calculates the Shan-Chen force between a single fluid component (self-interaction). -// The form of the interaction is the same as that of a force between two components (see below). -${device_func} inline void shan_chen_force_self(int i, ${global_ptr} float *f1, - float cc, float *a1, int x, int y -%if dim == 3: - , int z -%endif -) -{ - float t1; - - %for i in range(0, dim): - a1[${i}] = 0.0f; - %endfor - - %if block.envelope_size != 0: - int off; - %endif - - int gi; // global index - - %for i, ve in enumerate(grid.basis): - %if ve.dot(ve) != 0.0: - // ${ve} - %if block.envelope_size == 0: - ${get_field_loc(*ve)}; - %else: - ${get_field_off(*ve)} - gi = i + off; - %endif - - t1 = sc_ppot(f1, gi); - - %if ve[0] != 0: - a1[0] += t1 * ${ve[0] * grid.weights[i]}; - %endif - %if ve[1] != 0: - a1[1] += t1 * ${ve[1] * grid.weights[i]}; - %endif - %if dim == 3 and ve[2] != 0: - a1[2] += t1 * ${ve[2] * grid.weights[i]}; - %endif - %endif - %endfor - - // Local node -- no offset. - t1 = sc_ppot(f1, i); - - %for i in range(0, dim): - a1[${i}] *= t1 * cc; - %endfor -} - // Calculates the Shan-Chen force between two fluid components. // // F = -G * \phi_A(x) \sum_i w_i e_i \phi_B(x + e_i) // // i: global node index +// rho: density at the current node +// field: (density) fielf of the other fluid component // f1, f2: fields // cc: coupling constant -// a1, a2: Shan-Chen accelerations (output variables) +// force: Shan-Chen force (output variable) // x, y, [z]: position of the node -${device_func} inline void shan_chen_force(int i, ${global_ptr} float *f1, ${global_ptr} float *f2, -float cc, float *a1, float *a2, int x, int y -%if dim == 3: - , int z -%endif -) +${device_func} inline void shan_chen_force(int i, float rho, ${global_ptr} float *field, +float cc, float *force, ${position_decl(prefix='')}) { - float t1, t2; + float psi; %for i in range(0, dim): - a1[${i}] = 0.0f; - a2[${i}] = 0.0f; + force[${i}] = 0.0f; %endfor %if block.envelope_size != 0: @@ -159,30 +71,20 @@ float cc, float *a1, float *a2, int x, int y gi = i + off; %endif - t1 = sc_ppot(f1, gi); - t2 = sc_ppot(f2, gi); + psi = sc_ppot(field, gi); - %if ve[0] != 0: - a1[0] += t2 * ${ve[0] * grid.weights[i]}; - a2[0] += t1 * ${ve[0] * grid.weights[i]}; - %endif - %if ve[1] != 0: - a1[1] += t2 * ${ve[1] * grid.weights[i]}; - a2[1] += t1 * ${ve[1] * grid.weights[i]}; - %endif - %if dim == 3 and ve[2] != 0: - a1[2] += t2 * ${ve[2] * grid.weights[i]}; - a2[2] += t1 * ${ve[2] * grid.weights[i]}; - %endif + %for j, component in enumerate(ve): + %if component != 0: + force[${j}] += psi * ${component * grid.weights[i]}; + %endif + %endfor %endif %endfor // Local node -- no offset. - t1 = sc_ppot(f1, i); - t2 = sc_ppot(f2, i); + psi = ${cex(sym.SHAN_CHEN_POTENTIALS[sc_potential]('rho'))}; %for i in range(0, dim): - a1[${i}] *= - t1 * cc; - a2[${i}] *= - t2 * cc; + force[${i}] *= - psi * cc; %endfor } diff --git a/sailfish/templates/single_fluid.mako b/sailfish/templates/single_fluid.mako index c6158ddc..c527cc30 100644 --- a/sailfish/templates/single_fluid.mako +++ b/sailfish/templates/single_fluid.mako @@ -7,14 +7,14 @@ ${const_var} float gravity = ${gravity}f; %endif -<%def name="bgk_args_decl()"> +<%def name="bgk_args_decl(grid_idx=0)"> float rho, float *iv0 %if simtype == 'shan-chen': , float *ea0 %endif -<%def name="bgk_args()"> +<%def name="bgk_args(grid_idx=0)"> g0m0, v %if simtype == 'shan-chen': , sca0 @@ -174,17 +174,12 @@ ${kernel} void CollideAndPropagate( ovx, ovy ${', ovz' if dim == 3 else ''}, gg0m0 ${scratch_space_arg_if_required()}); - %if simtype == 'shan-chen': - ${sc_calculate_accel()} - %endif - // Macroscopic quantities for the current cell float g0m0, v[${dim}]; + getMacro(&d0, ncode, type, orientation, &g0m0, v ${dynamic_val_call_args()}); %if simtype == 'shan-chen': - ${sc_macro_fields()} - %else: - getMacro(&d0, ncode, type, orientation, &g0m0, v ${dynamic_val_call_args()}); + ${sc_calculate_force()} %endif precollisionBoundaryConditions(&d0, ncode, type, orientation, &g0m0, v); @@ -198,29 +193,8 @@ ${kernel} void CollideAndPropagate( ${relaxate(bgk_args)} postcollisionBoundaryConditions(&d0, ncode, type, orientation, &g0m0, v, gi, dist_out ${scratch_space_arg_if_required()}); - - ## Grad outflow nodes use invalid values to tag directions lacking distribution - ## data. - if (isWetNode(type) ${'&& !isNTGradFreeflow(type)' if nt.NTGradFreeflow in node_types else ''}) { - checkInvalidValues(&d0, ${position()}); - } - - // Only save the macroscopic quantities if requested to do so. - if ((options & OPTION_SAVE_MACRO_FIELDS) - ## Nodes using the Grad approximation use the velocity from the - ## previous time step to compute the approximated distributions. - ${'|| isNTGradFreeflow(type)' if nt.NTGradFreeflow in node_types else ''}) { - gg0m0[gi] = g0m0 ${' +1.0f' if config.minimize_roundoff else ''}; - - %if not initialization: - ovx[gi] = v[0]; - ovy[gi] = v[1]; - %if dim == 3: - ovz[gi] = v[2]; - %endif - %endif - } - + ${check_invalid_values()} + ${save_macro_fields()} ${propagate('dist_out', 'd0')} }