From ff50d6f9830e9190d3813a554da1b5374f1d9e59 Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:41:32 +0000 Subject: [PATCH 1/4] Propagate alignment of memory accesses to simd_selection Currently it's always 8 but having this argument will help us consider alignment for new vector sequences. --- backend/amd64/simd_selection.ml | 19 +++++++++++++++---- backend/arm64/simd_selection.ml | 3 ++- backend/cfg/vectorize.ml | 14 +++++++++++++- backend/vectorize_utils.ml | 6 ++++++ backend/vectorize_utils.mli | 4 ++++ 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml index 526d37d3f11..306c2ff4082 100644 --- a/backend/amd64/simd_selection.ml +++ b/backend/amd64/simd_selection.ml @@ -474,12 +474,24 @@ let vector_width_in_bits = 128 (* CR-soon gyorsh: [vectorize_operation] is too long, refactor / split up. *) let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t) - ~arg_count ~res_count (cfg_ops : Operation.t list) : + ~arg_count ~res_count ~alignment_in_bytes (cfg_ops : Operation.t list) : Vectorize_utils.Vectorized_instruction.t list option = (* Assumes cfg_ops are isomorphic *) let width_in_bits = Vectorize_utils.Width_in_bits.to_int width_type in let length = List.length cfg_ops in assert (length * width_in_bits = vector_width_in_bits); + let vector_width_in_bytes = vector_width_in_bits / 8 in + let is_aligned_to_vector_width () = + match alignment_in_bytes with + | None -> Misc.fatal_error "Unexpected memory operation" + | Some alignment_in_bytes -> + Int.compare alignment_in_bytes vector_width_in_bytes >= 0 + in + let vec128_chunk () : Cmm.memory_chunk = + if is_aligned_to_vector_width () + then Onetwentyeight_aligned + else Onetwentyeight_unaligned + in let same_width memory_chunk = Vectorize_utils.Width_in_bits.equal width_type (Vectorize_utils.Width_in_bits.of_memory_chunk memory_chunk) @@ -650,7 +662,7 @@ let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t) assert (arg_count = num_args_addressing && res_count = 1); let operation = Operation.Load - { memory_chunk = Onetwentyeight_unaligned; + { memory_chunk = vec128_chunk (); addressing_mode; mutability; is_atomic @@ -670,8 +682,7 @@ let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t) let num_args_addressing = Arch.num_args_addressing addressing_mode in assert (arg_count = num_args_addressing + 1 && res_count = 0); let operation = - Operation.Store - (Onetwentyeight_unaligned, addressing_mode, is_assignment) + Operation.Store (vec128_chunk (), addressing_mode, is_assignment) in Some [ { operation; diff --git a/backend/arm64/simd_selection.ml b/backend/arm64/simd_selection.ml index 3e18e247129..87a2a7eff3d 100644 --- a/backend/arm64/simd_selection.ml +++ b/backend/arm64/simd_selection.ml @@ -24,6 +24,7 @@ let pseudoregs_for_operation _ arg res = arg, res let vector_width_in_bits = 128 -let vectorize_operation _ ~arg_count:_ ~res_count:_ (_ : Operation.t list) : +let vectorize_operation _ ~arg_count:_ ~res_count:_ ~alignment_in_bytes:_ + (_ : Operation.t list) : Vectorize_utils.Vectorized_instruction.t list option = None diff --git a/backend/cfg/vectorize.ml b/backend/cfg/vectorize.ml index 77703302f84..6b79d8ec7f8 100644 --- a/backend/cfg/vectorize.ml +++ b/backend/cfg/vectorize.ml @@ -638,6 +638,8 @@ module Dependencies : sig type t val first_memory_arg_index : t -> int + + val alignment_in_bytes : t -> int end end @@ -821,6 +823,8 @@ end = struct type t val first_memory_arg_index : t -> int + + val alignment_in_bytes : t -> int end module Dependencies : sig @@ -918,6 +922,8 @@ end = struct val first_memory_arg_index : t -> int + val alignment_in_bytes : t -> int + val get_instruction_id : t -> Instruction.Id.t (** [is_adjacent t1 t2] assumes that [t1] and [t2] have isomorphic operations, @@ -956,6 +962,9 @@ end = struct let first_memory_arg_index t = Memory_access.first_memory_arg_index t.memory_access + let alignment_in_bytes t = + Vectorize_utils.Memory_access.alignment_in_bytes t.memory_access + let get_instruction_id t = Instruction.id t.instruction let memory_access (instruction : Instruction.t) : Memory_access.t option = @@ -2134,12 +2143,15 @@ end = struct && can_vectorize_memory_accesses mem_op instructions deps) then None else + let alignment_in_bytes = + Option.map Dependencies.Memory.Operation.alignment_in_bytes mem_op + in let cfg_ops = List.map (fun i -> i |> Instruction.op |> Option.get) instructions in let vector_instructions = Simd_selection.vectorize_operation width_in_bits ~arg_count - ~res_count cfg_ops + ~res_count ~alignment_in_bytes cfg_ops in match vector_instructions with | None -> None diff --git a/backend/vectorize_utils.ml b/backend/vectorize_utils.ml index f119306bbe8..82cf933595d 100644 --- a/backend/vectorize_utils.ml +++ b/backend/vectorize_utils.ml @@ -72,6 +72,12 @@ module Memory_access = struct let desc t = t.desc let first_memory_arg_index t = t.first_memory_arg_index + + let alignment_in_bytes t = + (* CR-someday gyorsh: propagate alignment of base address (such as + bigarray). Can be used to emit more efficient vector sequences, for + example, arithmetic operations with memory arguments (not stack). *) + Arch.size_int end module Vectorized_instruction = struct diff --git a/backend/vectorize_utils.mli b/backend/vectorize_utils.mli index 43e6961f35a..7487a69978d 100644 --- a/backend/vectorize_utils.mli +++ b/backend/vectorize_utils.mli @@ -53,6 +53,10 @@ module Memory_access : sig val desc : t -> desc val first_memory_arg_index : t -> int + + (** Base address of memory access [t] is guaranteed to be aligned to + at least [alignment_in_bytes t]. *) + val alignment_in_bytes : t -> int end module Vectorized_instruction : sig From 9e238e25ec7d57fadf777bf15492264a5c51a1a0 Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:16:24 +0000 Subject: [PATCH 2/4] Enable warnings on the new files --- backend/amd64/vectorize_specific.ml | 2 ++ backend/arm64/vectorize_specific.ml | 2 ++ backend/vectorize_utils.ml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/backend/amd64/vectorize_specific.ml b/backend/amd64/vectorize_specific.ml index 2f535ca2834..e6bcf4cfd58 100644 --- a/backend/amd64/vectorize_specific.ml +++ b/backend/amd64/vectorize_specific.ml @@ -1,3 +1,5 @@ +[@@@ocaml.warning "+a-40-42"] + (* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise], [Arch.operation_allocates]. *) module Memory_access = Vectorize_utils.Memory_access diff --git a/backend/arm64/vectorize_specific.ml b/backend/arm64/vectorize_specific.ml index 5eb1ff3886e..550f505c778 100644 --- a/backend/arm64/vectorize_specific.ml +++ b/backend/arm64/vectorize_specific.ml @@ -1,3 +1,5 @@ +[@@@ocaml.warning "+a-40-42"] + (* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise], [Arch.operation_allocates]. *) module Memory_access = Vectorize_utils.Memory_access diff --git a/backend/vectorize_utils.ml b/backend/vectorize_utils.ml index 82cf933595d..848b2e91148 100644 --- a/backend/vectorize_utils.ml +++ b/backend/vectorize_utils.ml @@ -1,3 +1,5 @@ +[@@@ocaml.warning "+a-40-42"] + open Arch module Width_in_bits = struct From ce9d4f28a496b67d383216e30a7d11c0205bdde8 Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:17:11 +0000 Subject: [PATCH 3/4] Fix warnings --- backend/vectorize_utils.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/vectorize_utils.ml b/backend/vectorize_utils.ml index 848b2e91148..87a79c49011 100644 --- a/backend/vectorize_utils.ml +++ b/backend/vectorize_utils.ml @@ -75,7 +75,7 @@ module Memory_access = struct let first_memory_arg_index t = t.first_memory_arg_index - let alignment_in_bytes t = + let alignment_in_bytes _t = (* CR-someday gyorsh: propagate alignment of base address (such as bigarray). Can be used to emit more efficient vector sequences, for example, arithmetic operations with memory arguments (not stack). *) From ac2955bd9b594735a1bf11a927fa5d7cd75c75de Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:26:34 +0000 Subject: [PATCH 4/4] Improve alignment check to be more robust --- backend/amd64/simd_selection.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml index 306c2ff4082..ed10039d1c3 100644 --- a/backend/amd64/simd_selection.ml +++ b/backend/amd64/simd_selection.ml @@ -485,7 +485,8 @@ let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t) match alignment_in_bytes with | None -> Misc.fatal_error "Unexpected memory operation" | Some alignment_in_bytes -> - Int.compare alignment_in_bytes vector_width_in_bytes >= 0 + alignment_in_bytes mod vector_width_in_bytes = 0 + && alignment_in_bytes / vector_width_in_bytes > 1 in let vec128_chunk () : Cmm.memory_chunk = if is_aligned_to_vector_width ()