Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check_implemented_domains fails #730

Closed
isuruf opened this issue Jan 11, 2023 · 2 comments
Closed

check_implemented_domains fails #730

isuruf opened this issue Jan 11, 2023 · 2 comments

Comments

@isuruf
Copy link
Collaborator

isuruf commented Jan 11, 2023

Following kernel fails with

loopy.diagnostic.LoopyError: sanity check failed--implemented and desired domain for instruction 'write_result' do not match

implemented: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end }

desired:[ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end and 0 <= iknl <= 1 }

sample point in implemented but not desired: itgt=0, itgt_box=0, ntgt_boxes=1, itgt_end=1, itgt_start=0, iknl=2
gist of constraints in implemented but not desired: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= iknl <= 1 }

loopy kernel:

import loopy as lp
import numpy as np
from pymbolic.primitives import *
import immutables


e2p_from_csr_knl = lp.make_kernel(
    [
    "[ntgt_boxes] -> { [itgt_box] : 0 <= itgt_box < ntgt_boxes }",
    "[itgt_end, itgt_start] -> { [itgt] : itgt_start <= itgt < itgt_end }",
    "[isrc_box_end, isrc_box_start] -> { [isrc_box] : isrc_box_start <= isrc_box < isrc_box_end }",
    "{ [idim, idim_0] : 0 <= idim <= 1 and 0 <= idim_0 <= 1 }",
    "{ [icoeff_0] : 0 <= icoeff_0 <= 2 }",
    "{ [iknl, iknl_0] : 0 <= iknl <= 1 and 0 <= iknl_0 <= 1 }",
    "{ [e2p_idim] : 0 <= e2p_idim <= 1 }",
    ],
    '''
    for itgt_box
    tgt_ibox = target_boxes[itgt_box] {id=insn}
    itgt_start = box_target_starts[tgt_ibox] {id=insn_0, dep=insn}
    itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] {id=insn_1, dep=insn:insn_0}
    for itgt
    tgt[idim] = targets[idim, itgt] {id=insn_2}
    isrc_box_start = source_box_starts[itgt_box] {id=insn_3}
    isrc_box_end = source_box_starts[itgt_box + 1] {id=insn_4}
    result_temp[iknl_0] = 0 {id=init_result}
    for isrc_box
    src_ibox = source_box_lists[isrc_box] {id=insn_5}
    coeffs[icoeff_0] = src_expansions[src_ibox + (-1)*src_base_ibox, icoeff_0] {id=fetch_coeffs, dep=insn_5}
    center[idim_0] = centers[idim_0, src_ibox] {id=fetch_center, dep=insn_5}
    ... nop {id=e2p__start, dep=fetch_coeffs:insn_2:init_result:fetch_center}
    e2p_kernel_scaling = ((-1) / 2)*3.141592653589793**(-1) {id=e2p_insn, dep=e2p__start}
    e2p_b[e2p_idim] = tgt[e2p_idim] + (-1)*center[e2p_idim] {id=e2p_insn_0, dep=e2p__start}
    e2p_expr_4 = 1 / rscale {id=e2p_insn_1, dep=e2p__start}
    e2p_expr_0 = e2p_b[0]*e2p_b[0] + e2p_b[1]*e2p_b[1] {id=e2p_insn_2, dep=e2p_insn_0:e2p__start}
    e2p_expr_1 = rscale*(1 / e2p_expr_0) {id=e2p_insn_3, dep=e2p__start:e2p_insn_2}
    e2p_expr_2 = e2p_b[1]*coeffs[2] {id=e2p_insn_4, dep=e2p_insn_0:e2p__start}
    e2p_expr_3 = e2p_b[0]*e2p_expr_1 {id=e2p_insn_5, dep=e2p_insn_0:e2p_insn_3:e2p__start}
    e2p_temp_2 = e2p_b[0]*e2p_expr_4 {id=e2p_insn_6, dep=e2p_insn_0:e2p_insn_1:e2p__start}
    e2p_temp_0 = e2p_b[1]*e2p_expr_4 {id=e2p_insn_7, dep=e2p_insn_0:e2p_insn_1:e2p__start}
    e2p_cse_exprvar = e2p_temp_0*e2p_temp_0 + e2p_temp_2*e2p_temp_2 {id=e2p_insn_8, dep=e2p_insn_6:e2p__start:e2p_insn_7}
    e2p_cse_exprvar_0 = sqrt(e2p_cse_exprvar) {id=e2p_insn_9, dep=e2p_insn_8:e2p__start}
    e2p_temp_1 = e2p_cse_exprvar_0 {id=e2p_insn_10, dep=e2p_insn_9:e2p__start}
    e2p_cse_exprvar_1 = sqrt(e2p_expr_0) {id=e2p_insn_11, dep=e2p__start:e2p_insn_2}
    result_temp[0] = result_temp[0] + e2p_kernel_scaling*(coeffs[0]*log(e2p_cse_exprvar_1) + e2p_expr_3*coeffs[1] + e2p_expr_2*e2p_expr_1) {id=e2p_result_0, dep=e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_11:e2p_insn_3:e2p__start}
    e2p_temp_5 = e2p_expr_4 {id=e2p_insn_12, dep=e2p_insn_1:e2p__start}
    e2p_cse_exprvar_2 = 1 / e2p_temp_1 {id=e2p_insn_13, dep=e2p__start:e2p_insn_10}
    e2p_temp_6 = (1 + (-2)*e2p_temp_2*e2p_expr_3)*e2p_cse_exprvar_2*e2p_cse_exprvar_2 {id=e2p_insn_14, dep=e2p__start:e2p_insn_5:e2p_insn_13:e2p_insn_6}
    e2p_cse_exprvar_3 = 1 / e2p_expr_0 {id=e2p_insn_15, dep=e2p__start:e2p_insn_2}
    result_temp[1] = result_temp[1] + e2p_kernel_scaling*(e2p_expr_3*e2p_temp_5*coeffs[0] + e2p_temp_5*e2p_temp_6*coeffs[1] + (-2)*e2p_b[0]*e2p_expr_2*e2p_temp_5*rscale*rscale*e2p_cse_exprvar_3*e2p_cse_exprvar_3) {id=e2p_result_1, dep=e2p_insn_0:e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_14:e2p_insn_15:e2p__start:e2p_insn_12}
    ... nop {id=update_result, dep=e2p_result_1:e2p_insn_7:e2p_insn_2:e2p_insn_5:e2p_insn_15:e2p_insn_12:e2p_insn_4:e2p_insn_11:e2p_insn_3:e2p_insn_9:e2p_result_0:e2p_insn_1:e2p_insn_6:e2p_insn:e2p_insn_8:e2p_insn_14:e2p_insn_10:e2p_insn_13:e2p_insn_0}
    end
    result[iknl, itgt] = result_temp[iknl] {id=write_result, dep=update_result:init_result}
    end
    end
    ''', [
        lp.GlobalArg(
            name="targets", dtype=np.float64,
            shape=(2, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="box_target_starts", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="box_target_counts_nonchild", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="centers", dtype=np.float64,
            shape=(2, Variable('aligned_nboxes')), for_atomic=False),
        lp.GlobalArg(
            name="src_expansions", dtype=np.float64,
            shape=(Variable('nsrc_level_boxes'), 3), for_atomic=False),
        lp.ValueArg(
            name="src_base_ibox",
            dtype=np.int32),
        lp.ValueArg(
            name="nsrc_level_boxes",
            dtype=np.int32),
        lp.ValueArg(
            name="aligned_nboxes",
            dtype=np.int32),
        lp.ValueArg(
            name="ntargets",
            dtype=np.int32),
        lp.GlobalArg(
            name="result", dtype=np.float64,
            shape=(2, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="source_box_starts", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="source_box_lists", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.ValueArg(
            name="ntgt_boxes",
            dtype=np.int32),
        lp.ValueArg(
            name="rscale",
            dtype=np.float64),
        lp.GlobalArg(
            name="target_boxes", dtype=np.uint32,
            shape=(Variable('ntgt_boxes'),), for_atomic=False),
        lp.TemporaryVariable(
            name="tgt_ibox",
            dtype=np.uint32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="tgt",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="result_temp",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="src_ibox",
            dtype=np.uint32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="coeffs",
            dtype=np.float64,
            shape=(3,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="center",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_3",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_kernel_scaling",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_b",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_4",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_3",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_5",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_6",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        ],
        lang_version=(2018, 2),
        name="e2p_from_csr",
        )

e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "e2p_idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "itgt_box:g.0")

t_unit = lp.merge([e2p_from_csr_knl])
lp.generate_code_v2(t_unit).device_code()
@inducer
Copy link
Owner

inducer commented May 10, 2023

Turns out this is inducer/islpy#103 in disguise. See #768.

@isuruf
Copy link
Collaborator Author

isuruf commented May 10, 2023

Thanks @inducer for tracking this down to inducer/islpy#103 and for the PR

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants