We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Following kernel fails with
loopy.diagnostic.LoopyError: sanity check failed--implemented and desired domain for instruction 'write_result' do not match implemented: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end } desired:[ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end and 0 <= iknl <= 1 } sample point in implemented but not desired: itgt=0, itgt_box=0, ntgt_boxes=1, itgt_end=1, itgt_start=0, iknl=2 gist of constraints in implemented but not desired: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= iknl <= 1 }
loopy kernel:
import loopy as lp import numpy as np from pymbolic.primitives import * import immutables e2p_from_csr_knl = lp.make_kernel( [ "[ntgt_boxes] -> { [itgt_box] : 0 <= itgt_box < ntgt_boxes }", "[itgt_end, itgt_start] -> { [itgt] : itgt_start <= itgt < itgt_end }", "[isrc_box_end, isrc_box_start] -> { [isrc_box] : isrc_box_start <= isrc_box < isrc_box_end }", "{ [idim, idim_0] : 0 <= idim <= 1 and 0 <= idim_0 <= 1 }", "{ [icoeff_0] : 0 <= icoeff_0 <= 2 }", "{ [iknl, iknl_0] : 0 <= iknl <= 1 and 0 <= iknl_0 <= 1 }", "{ [e2p_idim] : 0 <= e2p_idim <= 1 }", ], ''' for itgt_box tgt_ibox = target_boxes[itgt_box] {id=insn} itgt_start = box_target_starts[tgt_ibox] {id=insn_0, dep=insn} itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] {id=insn_1, dep=insn:insn_0} for itgt tgt[idim] = targets[idim, itgt] {id=insn_2} isrc_box_start = source_box_starts[itgt_box] {id=insn_3} isrc_box_end = source_box_starts[itgt_box + 1] {id=insn_4} result_temp[iknl_0] = 0 {id=init_result} for isrc_box src_ibox = source_box_lists[isrc_box] {id=insn_5} coeffs[icoeff_0] = src_expansions[src_ibox + (-1)*src_base_ibox, icoeff_0] {id=fetch_coeffs, dep=insn_5} center[idim_0] = centers[idim_0, src_ibox] {id=fetch_center, dep=insn_5} ... nop {id=e2p__start, dep=fetch_coeffs:insn_2:init_result:fetch_center} e2p_kernel_scaling = ((-1) / 2)*3.141592653589793**(-1) {id=e2p_insn, dep=e2p__start} e2p_b[e2p_idim] = tgt[e2p_idim] + (-1)*center[e2p_idim] {id=e2p_insn_0, dep=e2p__start} e2p_expr_4 = 1 / rscale {id=e2p_insn_1, dep=e2p__start} e2p_expr_0 = e2p_b[0]*e2p_b[0] + e2p_b[1]*e2p_b[1] {id=e2p_insn_2, dep=e2p_insn_0:e2p__start} e2p_expr_1 = rscale*(1 / e2p_expr_0) {id=e2p_insn_3, dep=e2p__start:e2p_insn_2} e2p_expr_2 = e2p_b[1]*coeffs[2] {id=e2p_insn_4, dep=e2p_insn_0:e2p__start} e2p_expr_3 = e2p_b[0]*e2p_expr_1 {id=e2p_insn_5, dep=e2p_insn_0:e2p_insn_3:e2p__start} e2p_temp_2 = e2p_b[0]*e2p_expr_4 {id=e2p_insn_6, dep=e2p_insn_0:e2p_insn_1:e2p__start} e2p_temp_0 = e2p_b[1]*e2p_expr_4 {id=e2p_insn_7, dep=e2p_insn_0:e2p_insn_1:e2p__start} e2p_cse_exprvar = e2p_temp_0*e2p_temp_0 + e2p_temp_2*e2p_temp_2 {id=e2p_insn_8, dep=e2p_insn_6:e2p__start:e2p_insn_7} e2p_cse_exprvar_0 = sqrt(e2p_cse_exprvar) {id=e2p_insn_9, dep=e2p_insn_8:e2p__start} e2p_temp_1 = e2p_cse_exprvar_0 {id=e2p_insn_10, dep=e2p_insn_9:e2p__start} e2p_cse_exprvar_1 = sqrt(e2p_expr_0) {id=e2p_insn_11, dep=e2p__start:e2p_insn_2} result_temp[0] = result_temp[0] + e2p_kernel_scaling*(coeffs[0]*log(e2p_cse_exprvar_1) + e2p_expr_3*coeffs[1] + e2p_expr_2*e2p_expr_1) {id=e2p_result_0, dep=e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_11:e2p_insn_3:e2p__start} e2p_temp_5 = e2p_expr_4 {id=e2p_insn_12, dep=e2p_insn_1:e2p__start} e2p_cse_exprvar_2 = 1 / e2p_temp_1 {id=e2p_insn_13, dep=e2p__start:e2p_insn_10} e2p_temp_6 = (1 + (-2)*e2p_temp_2*e2p_expr_3)*e2p_cse_exprvar_2*e2p_cse_exprvar_2 {id=e2p_insn_14, dep=e2p__start:e2p_insn_5:e2p_insn_13:e2p_insn_6} e2p_cse_exprvar_3 = 1 / e2p_expr_0 {id=e2p_insn_15, dep=e2p__start:e2p_insn_2} result_temp[1] = result_temp[1] + e2p_kernel_scaling*(e2p_expr_3*e2p_temp_5*coeffs[0] + e2p_temp_5*e2p_temp_6*coeffs[1] + (-2)*e2p_b[0]*e2p_expr_2*e2p_temp_5*rscale*rscale*e2p_cse_exprvar_3*e2p_cse_exprvar_3) {id=e2p_result_1, dep=e2p_insn_0:e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_14:e2p_insn_15:e2p__start:e2p_insn_12} ... nop {id=update_result, dep=e2p_result_1:e2p_insn_7:e2p_insn_2:e2p_insn_5:e2p_insn_15:e2p_insn_12:e2p_insn_4:e2p_insn_11:e2p_insn_3:e2p_insn_9:e2p_result_0:e2p_insn_1:e2p_insn_6:e2p_insn:e2p_insn_8:e2p_insn_14:e2p_insn_10:e2p_insn_13:e2p_insn_0} end result[iknl, itgt] = result_temp[iknl] {id=write_result, dep=update_result:init_result} end end ''', [ lp.GlobalArg( name="targets", dtype=np.float64, shape=(2, Variable('ntargets')), for_atomic=False), lp.GlobalArg( name="box_target_starts", dtype=np.uint32, shape=None, for_atomic=False), lp.GlobalArg( name="box_target_counts_nonchild", dtype=np.uint32, shape=None, for_atomic=False), lp.GlobalArg( name="centers", dtype=np.float64, shape=(2, Variable('aligned_nboxes')), for_atomic=False), lp.GlobalArg( name="src_expansions", dtype=np.float64, shape=(Variable('nsrc_level_boxes'), 3), for_atomic=False), lp.ValueArg( name="src_base_ibox", dtype=np.int32), lp.ValueArg( name="nsrc_level_boxes", dtype=np.int32), lp.ValueArg( name="aligned_nboxes", dtype=np.int32), lp.ValueArg( name="ntargets", dtype=np.int32), lp.GlobalArg( name="result", dtype=np.float64, shape=(2, Variable('ntargets')), for_atomic=False), lp.GlobalArg( name="source_box_starts", dtype=np.uint32, shape=None, for_atomic=False), lp.GlobalArg( name="source_box_lists", dtype=np.uint32, shape=None, for_atomic=False), lp.ValueArg( name="ntgt_boxes", dtype=np.int32), lp.ValueArg( name="rscale", dtype=np.float64), lp.GlobalArg( name="target_boxes", dtype=np.uint32, shape=(Variable('ntgt_boxes'),), for_atomic=False), lp.TemporaryVariable( name="tgt_ibox", dtype=np.uint32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="itgt_start", dtype=np.int32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="itgt_end", dtype=np.int32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="tgt", dtype=np.float64, shape=(2,), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="isrc_box_start", dtype=np.int32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="isrc_box_end", dtype=np.int32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="result_temp", dtype=np.float64, shape=(2,), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="src_ibox", dtype=np.uint32, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="coeffs", dtype=np.float64, shape=(3,), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="center", dtype=np.float64, shape=(2,), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_cse_exprvar", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_cse_exprvar_0", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_cse_exprvar_1", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_cse_exprvar_2", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_cse_exprvar_3", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_kernel_scaling", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_b", dtype=np.float64, shape=(2,), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_expr_4", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_expr_0", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_expr_1", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_expr_2", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_expr_3", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_temp_2", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_temp_0", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_temp_1", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_temp_5", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), lp.TemporaryVariable( name="e2p_temp_6", dtype=np.float64, shape=(), for_atomic=False, address_space=lp.auto, read_only=False, ), ], lang_version=(2018, 2), name="e2p_from_csr", ) e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "e2p_idim:unr") e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl:unr") e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim:unr") e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl_0:unr") e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim_0:unr") e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "itgt_box:g.0") t_unit = lp.merge([e2p_from_csr_knl]) lp.generate_code_v2(t_unit).device_code()
The text was updated successfully, but these errors were encountered:
Hack around nameless isl Set equality in implemented-domain caching (c…
c3e3154
…loses gh-730)
Turns out this is inducer/islpy#103 in disguise. See #768.
Sorry, something went wrong.
Thanks @inducer for tracking this down to inducer/islpy#103 and for the PR
1175657
No branches or pull requests
Following kernel fails with
loopy kernel:
The text was updated successfully, but these errors were encountered: