Skip to content
8 changes: 6 additions & 2 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from devito.operator.operator import rcompile
from devito.passes import stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, optimize_hyperplanes,
optimize_pows
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
optimize_hyperplanes, optimize_pows
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -67,6 +67,7 @@ def _normalize_kwargs(cls, **kwargs):
reduce=oo.pop('par-tile-reduce', None))

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = oo.pop('min-storage', False)
o['cire-rotate'] = oo.pop('cire-rotate', False)
o['cire-maxpar'] = oo.pop('cire-maxpar', False)
Expand Down Expand Up @@ -198,6 +199,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
12 changes: 9 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from devito.operator.operator import rcompile
from devito.passes import is_on_device, stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, memcpy_prefetch,
optimize_pows, tasking
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
memcpy_prefetch, optimize_pows, tasking
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -38,7 +38,9 @@

class DeviceOperatorMixin:

# Overrides the default values in the main Operator class
BLOCK_LEVELS = 0
CIRE_BLOCK_TEMPS = False

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it intended to be a default or enforced?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enforced or things would break

MPI_MODES = (True, 'basic',)

GPU_FIT = 'all-fallback'
Expand Down Expand Up @@ -76,9 +78,10 @@ def _normalize_kwargs(cls, **kwargs):
o['skewing'] = oo.pop('skewing', False)

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = False
o['cire-rotate'] = False
o['cire-maxpar'] = oo.pop('cire-maxpar', True)
o['cire-maxpar'] = oo.pop('cire-maxpar', 'basic')
o['cire-ftemps'] = oo.pop('cire-ftemps', False)
o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
Expand Down Expand Up @@ -239,6 +242,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ class BasicOperator(Operator):
situations where the performance impact might be detrimental.
"""

CIRE_BLOCK_TEMPS = True
"""
If an aliasing expression is computed within a blocked loop nest, all CIRE-
generated temporaries will inherit the block shape. If set to False, the
temporaries shape will systematically be defined by the root Dimensions.
"""

CIRE_MINGAIN = 10
"""
Minimum operation count reduction for a redundant expression to be optimized
Expand Down Expand Up @@ -240,6 +247,9 @@ def _check_kwargs(cls, **kwargs):
if oo['mpi'] and oo['mpi'] not in cls.MPI_MODES:
raise InvalidOperator(f"Unsupported MPI mode `{oo['mpi']}`")

if oo['cire-maxpar'] not in (False, 'basic', 'compact'):
raise InvalidOperator("Illegal `cire-maxpar` value")

if oo['cse-algo'] not in ('basic', 'smartsort', 'advanced'):
raise InvalidOperator("Illegal `cse-algo` value")

Expand Down
12 changes: 10 additions & 2 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,9 @@ def value(self, idx):
class IndexDerivative(IndexSum):

__rargs__ = ('expr', 'mapper')
__rkwargs__ = IndexSum.__rkwargs__ + ('deriv_order',)

def __new__(cls, expr, mapper, **kwargs):
def __new__(cls, expr, mapper, deriv_order=None, **kwargs):
dimensions = as_tuple(set(mapper.values()))

# Detect the Weights among the arguments
Expand All @@ -1008,6 +1009,8 @@ def __new__(cls, expr, mapper, **kwargs):
obj._weights = weights
obj._mapper = frozendict(mapper)

obj._deriv_order = deriv_order

return obj

def _hashable_content(self):
Expand Down Expand Up @@ -1036,6 +1039,10 @@ def weights(self):
def mapper(self):
return self._mapper

@property
def deriv_order(self):
return self._deriv_order

@property
def depth(self):
iderivs = self.expr.find(IndexDerivative)
Expand Down Expand Up @@ -1212,7 +1219,8 @@ def _diff2sympy(obj):

# Handle special objects
if isinstance(obj, DiffDerivative):
return IndexDerivative(*args, obj.mapper), True
return IndexDerivative(*args, obj.mapper,
deriv_order=obj.deriv_order), True

# Handle generic objects such as arithmetic operations
try:
Expand Down
4 changes: 3 additions & 1 deletion devito/finite_differences/finite_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coeffici
with suppress(AttributeError):
expr = expr._evaluate(expand=False)

deriv = DiffDerivative(expr*weights, {dim: indices.free_dim})
deriv = DiffDerivative(
expr*weights, {dim: indices.free_dim}, deriv_order=deriv_order
)
else:
terms = []
for i, c in zip(indices, weights, strict=True):
Expand Down
37 changes: 36 additions & 1 deletion devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from devito.mpi.halo_scheme import HaloScheme, HaloTouch
from devito.mpi.reduction_scheme import DistReduce
from devito.symbolics import estimate_cost
from devito.symbolics import estimate_cost, uxreplace
from devito.tools import as_tuple, filter_ordered, flatten, infer_dtype
from devito.types import (
CriticalRegion, Fence, Indexed, PhaseMarker, TensorMove, ThreadArrive, ThreadCommit,
Expand Down Expand Up @@ -128,6 +128,33 @@ def rebuild(self, *args, **kwargs):
syncs=kwargs.get('syncs', self.syncs),
halo_scheme=kwargs.get('halo_scheme', self.halo_scheme))

def subs(self, mapper, compact=()):
"""
Build a new Cluster applying substitutions rules to `self`.
"""
if not mapper:
return self

if self.halo_scheme:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can probably be relaxed to self.halo_scheme.distributed_aindices & mapper.keys()

raise NotImplementedError

key0 = lambda i: i.is_Block
subs0 = {d: self.ispace[d].promote(key0).dim for d in compact}

subs = {**mapper, **subs0}
exprs = [uxreplace(e, subs) for e in self.exprs]

ispace = self.ispace.switch(mapper)
key = lambda i: key0(i) and i in flatten(d._defines for d in subs0)
ispace = ispace.promote(key, mode='total')

guards = self.guards.subs(mapper).promote(subs0)
properties = self.properties.subs(mapper).promote(subs0)
syncs = self.syncs.subs(mapper)

return self.__class__(exprs=exprs, ispace=ispace, guards=guards,
properties=properties, syncs=syncs)

@property
def exprs(self):
return self._exprs
Expand Down Expand Up @@ -591,6 +618,14 @@ def dspace(self):
"""Return the DataSpace of this ClusterGroup."""
return DataSpace.union(*[i.dspace.reset() for i in self])

@property

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cached?

def is_dense(self):
return all(i.is_dense for i in self)

@property
def is_wild(self):
return all(i.is_wild for i in self)

@property
def is_halo_touch(self):
return all(i.is_halo_touch for i in self)
Expand Down
21 changes: 12 additions & 9 deletions devito/ir/clusters/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,7 @@ def _make_key(self, cluster, level):
assert self._q_ispace_in_key
ispace = cluster.ispace[:level]

if self._q_guards_in_key:
try:
guards = tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
guards = tuple(cluster.guards[0].get(i.dim) for i in ispace)
else:
guards = None
guards = self._make_key_guards(cluster, ispace)

if self._q_properties_in_key:
properties = cluster.properties.drop(cluster.ispace[level:].itdims)
Expand All @@ -68,6 +60,17 @@ def _make_key(self, cluster, level):

return (prefix,) + subkey

def _make_key_guards(self, cluster, ispace):
if not self._q_guards_in_key:
return None

try:
return tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
return tuple(cluster.guards[0].get(i.dim) for i in ispace)

def _make_key_hook(self, cluster, level):
return ()

Expand Down
30 changes: 21 additions & 9 deletions devito/ir/support/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
)
from devito.tools import (
CacheInstances, Tag, as_mapper, as_tuple, filter_sorted, flatten, is_integer,
memoized_generator, memoized_meth, smart_gt, smart_lt
memoized_generator, memoized_meth, smart_gt, smart_lt, split
)
from devito.types import (
ComponentAccess, CriticalRegion, Dimension, DimensionTuple, Fence, Function, Symbol,
TBArray, Temp, TempArray
TBArray, Temp, TempArray, TensorMove
)

__all__ = ['ExprGeometry', 'IterationInstance', 'Scope', 'TimedAccess']
Expand Down Expand Up @@ -1383,19 +1383,31 @@ def vinf(entries):

def retrieve_accesses(exprs, **kwargs):
"""
Like retrieve_terminals, but ensure that if a ComponentAccess is found,
the ComponentAccess itself is returned, while the wrapped Indexed is discarded.
Similar to `retrieve_terminals`, but with some adjustments:

* ComponentAccess's are retained, but the wrapped Indexed are discarded;
* TensorMove's are upcasted to the logical Indexed they represent.
"""
kwargs['mode'] = 'unique'

compaccs = search(exprs, ComponentAccess)
if not compaccs:
return retrieve_terminals(exprs, **kwargs)

subs = {i: Symbol(f'dummy{n}') for n, i in enumerate(compaccs)}
exprs1 = uxreplace(exprs, subs)
if compaccs:
# Handle ComponentAccesses
subs = {i: Symbol(f'dummy{n}') for n, i in enumerate(compaccs)}
exprs1 = uxreplace(exprs, subs)
terms1 = retrieve_terminals(exprs1, **kwargs)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would likely be "cheaper" to have an ignore=compaccs in retrieve_terminals


accesses = compaccs | terms1 - set(subs.values())
else:
accesses = retrieve_terminals(exprs, **kwargs)

# Handle TensorMoves
key = lambda i: isinstance(i, TensorMove)
tmovs, other = split(accesses, key)
accesses = {i.access for i in tmovs} | other

return compaccs | retrieve_terminals(exprs1, **kwargs) - set(subs.values())
return accesses


def disjoint_test(e0, e1, d, it):
Expand Down
25 changes: 25 additions & 0 deletions devito/ir/support/guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,31 @@ def as_map(self, d, cls):

return dict(i.args for i in search(self.get(d), cls))

def subs(self, mapper):
m = {mapper.get(d, d): v.xreplace(mapper) for d, v in self.items()}

return Guards(m)

def promote(self, subs):
m = self
for d, v in subs.items():
guards = {self.get(i) for i in d._defines} - {true}
if len(guards) > 1:
raise NotImplementedError(
f"Cannot promote {d} to {v} due to multiple guards: {guards}"
)
elif len(guards) == 0:
continue

guard = guards.pop()
guard = guard.xreplace({d: v})

m = m.impose(v, guard)

m = m.popany(subs)

return m


class GuardExpr(LocalObject, BooleanFunction):

Expand Down
Loading
Loading