Skip to content

Commit

Permalink
Split Shan-Chen and Free-Energy code into two kernels, one per
Browse files Browse the repository at this point in the history
lattice.
  • Loading branch information
mjanusz committed Dec 29, 2012
1 parent b16078c commit 98ac2f7
Show file tree
Hide file tree
Showing 16 changed files with 702 additions and 587 deletions.
2 changes: 1 addition & 1 deletion examples/sc_drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def update_defaults(cls, defaults):
defaults.update({
'lat_nx': 256,
'lat_ny': 256,
'G': 5.0,
'G': -5.0,
'visc': 1.0 / 6.0,
'periodic_x': True,
'periodic_y': True,
Expand Down
2 changes: 1 addition & 1 deletion examples/sc_phase_separation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def update_defaults(cls, defaults):
defaults.update({
'lat_nx': 256,
'lat_ny': 256,
'G': 5.0,
'G': -5.0,
'visc': 1.0 / 6.0,
'periodic_x': True,
'periodic_y': True,
Expand Down
1 change: 1 addition & 0 deletions sailfish/backend_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def get_kernel(self, prog, name, block, args, args_format, shared=0,

if needs_iteration:
args_format += 'i'
args = list(args)
args.append(0)
self._iteration_kernels.append(kern)

Expand Down
13 changes: 12 additions & 1 deletion sailfish/lb_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

FieldPair = namedtuple('FieldPair', 'abstract buffer')
ForcePair = namedtuple('ForcePair', 'numeric symbolic')
KernelPair = namedtuple('KernelPair', 'primary secondary')

class LBSim(object):
"""Describes a specific type of a lattice Boltzmann simulation."""
Expand Down Expand Up @@ -173,7 +174,14 @@ def after_step(self, runner):
pass

def get_compute_kernels(self, runner, full_output, bulk):
return []
"""
:param runner: SubdomainRunner object
:param full_output: if True, returns kernels that prepare fields for
visualization or saving into a file
:param bulk: if True, returns kernels that process the bulk domain,
otherwise returns kernels that process the subdomain boundary
"""
return KernelPair(None, None)

def get_pbc_kernels(self, runner):
return []
Expand Down Expand Up @@ -258,6 +266,9 @@ def use_force_for_equilibrium(self, force_grid, target_grid):
To disable acceleration on a grid, pass an invalid grid ID in force_grid
(e.g. None or -1).
Note: this is currently only supported in the free-energy MRT model.
The force reassignment will be silently ignored in other models.
:param force_grid: grid ID from which the acceleration will be used
:param target_grid: grid ID on which the acceleration will act
"""
Expand Down
255 changes: 185 additions & 70 deletions sailfish/lb_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from functools import partial
import numpy as np
from sailfish import subdomain_runner, sym, sym_equilibrium
from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField
from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField, KernelPair
from sailfish.lb_single import MacroKernels


Expand Down Expand Up @@ -92,74 +92,6 @@ def get_pbc_kernels(self, runner):
ret = MacroKernels(macro=macro_kernels, distributions=dist_kernels)
return ret

def get_compute_kernels(self, runner, full_output, bulk):
gpu_rho = runner.gpu_field(self.rho)
gpu_phi = runner.gpu_field(self.phi)
gpu_v = runner.gpu_field(self.v)
gpu_map = runner.gpu_geo_map()

gpu_dist1a = runner.gpu_dist(0, 0)
gpu_dist1b = runner.gpu_dist(0, 1)
gpu_dist2a = runner.gpu_dist(1, 0)
gpu_dist2b = runner.gpu_dist(1, 1)

options = 0
if full_output:
options |= 1
if bulk:
options |= 2

options = np.uint32(options)
args1 = [gpu_map, gpu_dist1a, gpu_dist1b, gpu_dist2a, gpu_dist2b,
gpu_rho, gpu_phi] + gpu_v + [options]
args2 = [gpu_map, gpu_dist1b, gpu_dist1a, gpu_dist2b, gpu_dist2a,
gpu_rho, gpu_phi] + gpu_v + [options]

macro_args1 = [gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi,
options]
macro_args2 = [gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi,
options]

args_signature = 'P' * (len(args1) - 1) + 'i'
macro_signature = 'P' * (len(macro_args1) - 1) + 'i'

if runner.gpu_scratch_space is not None:
macro_args1.append(runner.gpu_scratch_space)
macro_args2.append(runner.gpu_scratch_space)
macro_signature += 'P'

args1.append(runner.gpu_scratch_space)
args2.append(runner.gpu_scratch_space)
args_signature += 'P'

macro_kernels = [
runner.get_kernel('PrepareMacroFields', macro_args1,
macro_signature,
needs_iteration=self.config.needs_iteration_num)]

if self.config.access_pattern == 'AB':
macro_kernels.append(
runner.get_kernel('PrepareMacroFields', macro_args2,
macro_signature,
needs_iteration=self.config.needs_iteration_num))
else:
macro_kernels.append(macro_kernels[-1])

sim_kernels = [
runner.get_kernel('CollideAndPropagate', args1,
args_signature,
needs_iteration=self.config.needs_iteration_num)]

if self.config.access_pattern == 'AB':
sim_kernels.append(
runner.get_kernel('CollideAndPropagate', args2,
args_signature,
needs_iteration=self.config.needs_iteration_num))
else:
sim_kernels.append(sim_kernels[-1])

return zip(macro_kernels, sim_kernels)

def initial_conditions(self, runner):
gpu_rho = runner.gpu_field(self.rho)
gpu_phi = runner.gpu_field(self.phi)
Expand Down Expand Up @@ -203,7 +135,8 @@ def constants(self):

@classmethod
def fields(cls):
return [ScalarField('rho'), ScalarField('phi', need_nn=True), VectorField('v')]
return [ScalarField('rho'), ScalarField('phi', need_nn=True),
VectorField('v'), ScalarField('phi_laplacian')]

@classmethod
def add_options(cls, group, dim):
Expand Down Expand Up @@ -318,6 +251,101 @@ def _prepare_symbols(self):
self.S.wxx.append(-Rational(1, 24))
self.S.wyy.append(-Rational(1, 24))

def get_compute_kernels(self, runner, full_output, bulk):
gpu_rho = runner.gpu_field(self.rho)
gpu_phi = runner.gpu_field(self.phi)
gpu_lap = runner.gpu_field(self.phi_laplacian)
gpu_v = runner.gpu_field(self.v)
gpu_map = runner.gpu_geo_map()

gpu_dist1a = runner.gpu_dist(0, 0)
gpu_dist1b = runner.gpu_dist(0, 1)
gpu_dist2a = runner.gpu_dist(1, 0)
gpu_dist2b = runner.gpu_dist(1, 1)

options = 0
if full_output:
options |= 1
if bulk:
options |= 2

if hasattr(self, '_force_term_for_eq') and self._force_term_for_eq.get(1) == 0:
phi_args = [gpu_rho, gpu_phi]
else:
phi_args = [gpu_phi]

options = np.uint32(options)
# Primary.
args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] +
gpu_v + [gpu_lap, options])
args1b = ([gpu_map, gpu_dist2a, gpu_dist2b] + phi_args +
gpu_v + [gpu_lap, options])
# Secondary.
args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] +
gpu_v + [gpu_lap, options])
args2b = ([gpu_map, gpu_dist2b, gpu_dist2a] + phi_args +
gpu_v + [gpu_lap, options])

macro_args1 = [gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi,
options]
macro_args2 = [gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi,
options]

args_a_signature = 'P' * (len(args1a) - 1) + 'i'
args_b_signature = 'P' * (len(args1b) - 1) + 'i'
macro_signature = 'P' * (len(macro_args1) - 1) + 'i'

if runner.gpu_scratch_space is not None:
macro_args1.append(runner.gpu_scratch_space)
macro_args2.append(runner.gpu_scratch_space)
macro_signature += 'P'

args1a.append(runner.gpu_scratch_space)
args2a.append(runner.gpu_scratch_space)
args1b.append(runner.gpu_scratch_space)
args2b.append(runner.gpu_scratch_space)
args_a_signature += 'P'
args_b_signature += 'P'

macro = runner.get_kernel('FreeEnergyPrepareMacroFields', macro_args1,
macro_signature,
needs_iteration=self.config.needs_iteration_num)

if self.config.access_pattern == 'AB':
macro_secondary = runner.get_kernel('FreeEnergyPrepareMacroFields',
macro_args2,
macro_signature,
needs_iteration=self.config.needs_iteration_num)
macro_pair = KernelPair(macro, macro_secondary)
else:
macro_pair = KernelPair(macro, macro)

# Note: these two kernels need to be executed in order.
primary = [
runner.get_kernel('FreeEnergyCollideAndPropagateFluid', args1a,
args_a_signature,
needs_iteration=self.config.needs_iteration_num),
runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam', args1b,
args_b_signature,
needs_iteration=self.config.needs_iteration_num)
]

if self.config.access_pattern == 'AB':
secondary = [
runner.get_kernel('FreeEnergyCollideAndPropagateFluid', args2a,
args_a_signature,
needs_iteration=self.config.needs_iteration_num),
runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam',
args2b,
args_b_signature,
needs_iteration=self.config.needs_iteration_num)
]
sim_pair = KernelPair(primary, secondary)
else:
sim_pair = KernelPair(primary, primary)

return zip(macro_pair, sim_pair)


class LBBinaryFluidShanChen(LBBinaryFluidBase, LBForcedSim):
"""Binary fluid mixture using the Shan-Chen model."""
Expand Down Expand Up @@ -359,3 +387,90 @@ def update_context(self, ctx):
ctx['sc_potential'] = self.config.sc_potential
ctx['tau'] = sym.relaxation_time(self.config.visc)
ctx['visc'] = self.config.visc

def get_compute_kernels(self, runner, full_output, bulk):
gpu_rho = runner.gpu_field(self.rho)
gpu_phi = runner.gpu_field(self.phi)
gpu_v = runner.gpu_field(self.v)
gpu_map = runner.gpu_geo_map()

gpu_dist1a = runner.gpu_dist(0, 0)
gpu_dist1b = runner.gpu_dist(0, 1)
gpu_dist2a = runner.gpu_dist(1, 0)
gpu_dist2b = runner.gpu_dist(1, 1)

options = 0
if full_output:
options |= 1
if bulk:
options |= 2

options = np.uint32(options)
# Primary.
args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] +
gpu_v + [options])
args1b = ([gpu_map, gpu_dist2a, gpu_dist2b, gpu_rho, gpu_phi] +
gpu_v + [options])
# Secondary.
args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] +
gpu_v + [options])
args2b = ([gpu_map, gpu_dist2b, gpu_dist2a, gpu_rho, gpu_phi] +
gpu_v + [options])

macro_args1 = ([gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi] +
gpu_v + [options])
macro_args2 = ([gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi] +
gpu_v + [options])

args_a_signature = 'P' * (len(args1a) - 1) + 'i'
args_b_signature = 'P' * (len(args1b) - 1) + 'i'
macro_signature = 'P' * (len(macro_args1) - 1) + 'i'

if runner.gpu_scratch_space is not None:
macro_args1.append(runner.gpu_scratch_space)
macro_args2.append(runner.gpu_scratch_space)
macro_signature += 'P'

args1a.append(runner.gpu_scratch_space)
args2a.append(runner.gpu_scratch_space)
args1b.append(runner.gpu_scratch_space)
args2b.append(runner.gpu_scratch_space)
args_a_signature += 'P'
args_b_signature += 'P'

macro = runner.get_kernel('ShanChenPrepareMacroFields', macro_args1,
macro_signature,
needs_iteration=self.config.needs_iteration_num)

if self.config.access_pattern == 'AB':
macro_secondary = runner.get_kernel('ShanChenPrepareMacroFields', macro_args2,
macro_signature,
needs_iteration=self.config.needs_iteration_num)
macro_pair = KernelPair(macro, macro_secondary)
else:
macro_pair = KernelPair(macro, macro)

# TODO(michalj): These kernels can actually run in parallel.
primary = [
runner.get_kernel('ShanChenCollideAndPropagate0', args1a,
args_a_signature,
needs_iteration=self.config.needs_iteration_num),
runner.get_kernel('ShanChenCollideAndPropagate1', args1b,
args_b_signature,
needs_iteration=self.config.needs_iteration_num)
]

if self.config.access_pattern == 'AB':
secondary = [
runner.get_kernel('ShanChenCollideAndPropagate0', args2a,
args_a_signature,
needs_iteration=self.config.needs_iteration_num),
runner.get_kernel('ShanChenCollideAndPropagate1', args2b,
args_b_signature,
needs_iteration=self.config.needs_iteration_num)
]
sim_pair = KernelPair(primary, secondary)
else:
sim_pair = KernelPair(primary, primary)

return zip(macro_pair, sim_pair)
28 changes: 9 additions & 19 deletions sailfish/lb_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from sailfish import subdomain_runner, sym, sym_equilibrium
from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField
from sailfish.lb_base import LBSim, LBForcedSim, ScalarField, VectorField, KernelPair


MacroKernels = namedtuple('MacroKernels', 'distributions macro')
Expand Down Expand Up @@ -71,13 +71,6 @@ def initial_conditions(self, runner):
runner.exec_kernel('SetInitialConditions', args2, 'P'*len(args2))

def get_compute_kernels(self, runner, full_output, bulk):
"""
:param runner: SubdomainRunner object
:param full_output: if True, returns kernels that prepare fields for
visualization or saving into a file
:param bulk: if True, returns kernels that process the bulk domain,
otherwise returns kernels that process the subdomain boundary
"""
gpu_rho = runner.gpu_field(self.rho)
gpu_v = runner.gpu_field(self.v)
gpu_dist1a = runner.gpu_dist(0, 0)
Expand Down Expand Up @@ -109,21 +102,18 @@ def get_compute_kernels(self, runner, full_output, bulk):
args2.append(runner.gpu_field(self.alpha))
signature += 'P'

kernels = []

kernels.append(runner.get_kernel(
'CollideAndPropagate', args1, signature,
needs_iteration=self.config.needs_iteration_num))
cnp_primary = runner.get_kernel(
'CollideAndPropagate', args1, signature,
needs_iteration=self.config.needs_iteration_num)

if self.config.access_pattern == 'AB':
secondary_args = args2 if self.config.access_pattern == 'AB' else args1
kernels.append(runner.get_kernel(
'CollideAndPropagate', secondary_args, signature,
needs_iteration=self.config.needs_iteration_num))
cnp_secondary = runner.get_kernel(
'CollideAndPropagate', secondary_args, signature,
needs_iteration=self.config.needs_iteration_num)
return KernelPair([cnp_primary], [cnp_secondary])
else:
kernels.append(kernels[-1])

return kernels
return KernelPair([cnp_primary], [cnp_primary])

def get_pbc_kernels(self, runner):
gpu_dist1a = runner.gpu_dist(0, 0)
Expand Down
Loading

0 comments on commit 98ac2f7

Please sign in to comment.