Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 34a5e6a
Author: Jacob Van Buren <[email protected]>
Date:   Wed Jan 22 15:26:17 2025 -0500

    Squashed commit of the following:

    commit 16f1b23
    Author: Jacob Van Buren <[email protected]>
    Date:   Wed Jan 22 15:25:23 2025 -0500

        Squashed commit of the following:

        commit 7c5c7f0
        Author: Jacob Van Buren <[email protected]>
        Date:   Wed Jan 22 15:24:10 2025 -0500

            Squashed commit of the following:

            commit a53391d
            Author: Max Slater <[email protected]>
            Date:   Wed Jan 22 13:53:02 2025 -0500

                Rename `atomic_cas`/`Compare_and_swap` (#3491)

            commit 92b327c
            Author: Max Slater <[email protected]>
            Date:   Tue Jan 21 14:18:56 2025 -0500

                Additional operations for int atomics (#3490)

            commit 38e792c
            Author: Luke Maurer <[email protected]>
            Date:   Tue Jan 21 17:09:23 2025 +0000

                Support `-open Foo` where `Foo` is parameterised (#3489)

                The command line

                ```
                ocamlopt -open Foo -parameter P -c bar.ml
                ```

                should be fine, even if `Foo` is itself parameterised by `P`: as usual, we
                compile `bar.ml` as if it began with `open! Foo`, and by the subset rule, `Bar`
                can refer to `Foo` because it takes at least the same parameters. Unfortunately,
                currently we process `-open` before `-parameter`, so when we go to check the
                implicit reference to `Foo`, we think there are no parameters, and we report an
                error. (Confusingly, the error suggests that the user add `-parameter P` to the
                command line.)

                The fix is simple: move the code that processes `-parameter` earlier so that
                the initial environment is constructed with the parameters already available.

            commit 784dc96
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 21 14:07:41 2025 +0000

                Rename [emit.mlp] to [emit.ml] on amd64 (#3488)

            commit f3b720a
            Author: Zesen Qian <[email protected]>
            Date:   Tue Jan 21 10:35:35 2025 +0000

                Module aliases save locks instead of walking them immediately (#3398)

            commit 389a7c3
            Author: Nick Barnes <[email protected]>
            Date:   Mon Jan 20 17:55:51 2025 +0000

                Add OCAMLRUNPARAM d= parameter for max # domains (#3487)

                Allow maximum number of domains to be specified as a OCAMLRUNPARAM parameter.

                (cherry picked from commit f92715f1044aea30ca97e497653c883578f91fe6)

                Co-authored-by: KC Sivaramakrishnan <[email protected]>

            commit 63767d7
            Author: Nick Barnes <[email protected]>
            Date:   Mon Jan 20 17:45:30 2025 +0000

                Add caml_runtime_parameters back (#3468)

                Add caml_runtime_parameters back.

            commit 5e9975e
            Author: Greta Yorsh <[email protected]>
            Date:   Mon Jan 20 17:15:15 2025 +0000

                Emit atomic compare and exchange (#3486)

            commit a9821e8
            Author: Basile Clément <[email protected]>
            Date:   Mon Jan 20 15:36:09 2025 +0100

                Make patricia trees big-endian (#3438)

                This patch switches up the implementation of the `Patricia_tree` module
                from little-endian to big-endian, with the main motivation to be able to
                implement in-order traversal.

                The `caml_int_clz_tagged_to_untagged` and `caml_int_tagged_to_tagged` C
                stubs are recognized and replaced with the `clz` instruction when
                compiling with flambda2, so they are only used in the boot compiler.

            commit b8a9789
            Author: Leo White <[email protected]>
            Date:   Fri Jan 17 13:35:19 2025 +0000

                Generate specific instructions for atomics on immediates (#3477)

                * Generate specific instructions for atomics on immediates

                * Fix formatting

            commit 7b93134
            Author: Greta Yorsh <[email protected]>
            Date:   Fri Jan 17 09:30:30 2025 +0000

                Vectorizer: add tests (#3456)

                * Add tests

                * Disable ocamlformat on unboxed tests

                * Increase -vectorize-max-block-size for tests

                * Fix asssertion failure when vectorizing unboxed int32

                * Disable float32 on arm64 (not yet implemented)

                * improve gen_dune.ml for the vectorizer tests

                Co-authored-by: Xavier Clerc <[email protected]>

            commit 6379678
            Author: Mark Shinwell <[email protected]>
            Date:   Thu Jan 16 16:08:29 2025 +0000

                Add "-ocamlrunparam" linker flag (#3483)

            commit f7b2cbe
            Author: Xavier Clerc <[email protected]>
            Date:   Thu Jan 16 15:25:28 2025 +0000

                Bump the version of `actions/upload-artifact` (#3474)

                * Bump the version of actions/upload-artifact.

                * Ensure artifact names are unique.

                * To trigger CI.

                * Try with commit hash.

            commit afb8a55
            Author: Mark Shinwell <[email protected]>
            Date:   Thu Jan 16 14:03:37 2025 +0000

                Move two macOS CI controllers to runtime5 (#3482)

            commit aae5c40
            Author: Mark Shinwell <[email protected]>
            Date:   Thu Jan 16 13:50:34 2025 +0000

                Fix error in caml_get_init_stack_wsize (#3481)

            commit 525868c
            Author: dkalinichenko-js <[email protected]>
            Date:   Wed Jan 15 17:47:30 2025 -0500

                Use null pointers for `or_null`  (#3267)

                * runtime changes

                * runtime4 changes

                * Change `CODE_UNBOXED_INT64` and `CODE_NULL`

                * make `Is_block` an inline function

                * redefine `Is_long`

                * fix

                * Change `CODE_UNBOXED_INT64` back

                * optimize `Is_block`/`Is_long`

                * `null_tag` for `caml_obj_tag`

                * consistent naming

                * slightly more reassuring comment

                * `inline` is unnecessary and might break `#define inline`

                * optimization incorrect in presence of nulls

                * Constructors and pattern-matching

                * Bytecode compilation

                * `or_null` is `Variant_or_null`

                * Accept tests

                * Runtime tests

                * Delete obsolete or_null test

                ---------

                Co-authored-by: Diana Kalinichenko <[email protected]>

            commit 9796b21
            Author: dkalinichenko-js <[email protected]>
            Date:   Wed Jan 15 17:47:13 2025 -0500

                Runtime changes for `or_null` (#3265)

                * runtime changes

                * runtime4 changes

                * Change `CODE_UNBOXED_INT64` and `CODE_NULL`

                * make `Is_block` an inline function

                * redefine `Is_long`

                * fix

                * Change `CODE_UNBOXED_INT64` back

                * optimize `Is_block`/`Is_long`

                * `null_tag` for `caml_obj_tag`

                * consistent naming

                * slightly more reassuring comment

                * `inline` is unnecessary and might break `#define inline`

                * optimization incorrect in presence of nulls

                ---------

                Co-authored-by: Diana Kalinichenko <[email protected]>

            commit df4a6e0
            Author: Chris Casinghino <[email protected]>
            Date:   Wed Jan 15 13:08:53 2025 -0500

                Bump magic numbers for 5.2.0minus-5 (#3478)

            commit d1c8d85
            Author: Mark Shinwell <[email protected]>
            Date:   Wed Jan 15 16:44:39 2025 +0000

                Peek and poke (#3309)

            commit f8caad4
            Author: Greta Yorsh <[email protected]>
            Date:   Wed Jan 15 16:00:39 2025 +0000

                Vectorizer: xmm register can hold ocaml values (#3455)

                * Add [Valx2] to [Cmm.machtype_component]

                * Vectorizer generates [Valx2]

                * Record live offsets of [Valx2] in the frametable

                For runtime4, xmm register are below [gc_regs], use negative offsets.

                * Move [types_are_compatible] from [Reg] to [Proc]

                This information has to be in sync with register classes, stack
                slock classes, and emit for move instructions.

            commit 34a7873
            Author: Zesen Qian <[email protected]>
            Date:   Wed Jan 15 13:13:18 2025 +0000

                Improve coherence of modality zapping (#3462)

            commit 1a6a9d3
            Author: Mark Shinwell <[email protected]>
            Date:   Wed Jan 15 12:06:24 2025 +0000

                Fix caml_obj_with_tag (#3465)

            commit bc5110a
            Author: Nick Barnes <[email protected]>
            Date:   Wed Jan 15 11:52:25 2025 +0000

                Reset the pacing of major collection after any synchronous major GC (#3463)

                Reset the pacing of major collection at the end of any synchronous major collection.

            commit 9faf700
            Author: Mark Shinwell <[email protected]>
            Date:   Wed Jan 15 11:32:59 2025 +0000

                Remove remnants of caml_obj_truncate (#3469)

            commit ff9430b
            Author: Luke Maurer <[email protected]>
            Date:   Wed Jan 15 11:31:22 2025 +0000

                Mangle instance symbol names using `____` rather than `___` (#3472)

                Apparently there are libraries around that have names ending in single
                underscores, leading to ambiguous symbol names if we use triple
                underscores to delimit instances. Other choices are possible but this PR
                opts for newly-developed quadruple-underscore technology.

            commit 9984700
            Author: Vincent Laviron <[email protected]>
            Date:   Wed Jan 15 11:03:43 2025 +0100

                Port upstream PRs 11542 and 12505 to runtime4 (#3431)

                fix #11482: random crash in large closure allocation (#11542)

                Co-authored-by: Damien Doligez <[email protected]>

            commit 058c4db
            Author: Mark Shinwell <[email protected]>
            Date:   Tue Jan 14 22:19:32 2025 +0000

                Enable all makearray_dynamic tests on runtime4 (#3470)

            commit ba15ee5
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 20:08:31 2025 +0000

                Vectorize [Ifloatarithmem] (#3452)

                * Add [Isimd_mem] to [Arch.Specific] and emit [addpd] with memory arg

                and similar instructions

                * Vectorize [Ifloatarithmem]

                When the memory alignment is known to be 128-bit (currently, never) emits
                [addpd], otherwise emits a vector load followed by an arithmetic
                instruction.

            commit 9755b39
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 18:57:39 2025 +0000

                Fix CI failure (#3473)

            commit 859949c
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 17:56:53 2025 +0000

                Vectorize [Specific.Istore_int] (#3450)

                Used for array initialization (amd64)

            commit 50f73cb
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 17:38:39 2025 +0000

                Do not allow naked pointers (remove configure option) (#3448)

            commit b7c8ad3
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 17:07:39 2025 +0000

                Vectorizer refactor heuristic for select_and_join (#3449)

                * Refactor [Block.find_last_instruction], cache [Computation.last_pos]

                * Improve heuristics in [Computation.select_and_join] using [last_pos]

            commit 22f81d8
            Author: Mark Shinwell <[email protected]>
            Date:   Tue Jan 14 17:00:29 2025 +0000

                Fix mistake in conditional for makearray_dynamic array initialization (#3466)

            commit aaaddfb
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 14:57:17 2025 +0000

                Vectorizer: propagate alignment of memory accesses (#3451)

                Currently it's always 8 but having this argument will help us
                consider alignment for new vector sequences.

            commit b15d44e
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 14:55:29 2025 +0000

                vectorizer: improve debug printout (#3445)

            commit 6239156
            Author: Stephen Dolan <[email protected]>
            Date:   Tue Jan 14 13:43:21 2025 +0000

                Better hugepage alignment of stacks and heap (#3384)

                Co-authored-by: Mark Shinwell <[email protected]>

            commit 677d79a
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 13:23:56 2025 +0000

                Backend dune copy and directive (#3467)

                * Remove unused line directive from [dune]

                * Use [copy_files#] to copy files from ARCH and add a file directive

                * Remove existing file directives

            commit 314b131
            Author: Stephen Dolan <[email protected]>
            Date:   Tue Jan 14 11:58:41 2025 +0000

                Bound stack size in expect tests (#3439)

            commit 02774f8
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 10:52:50 2025 +0000

                all_deps is reflexive (#3464)

            commit 117a0a0
            Author: Stephen Dolan <[email protected]>
            Date:   Tue Jan 14 10:44:15 2025 +0000

                Stub implementation of new custom memory API (#3437)

            commit 4f30aac
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Jan 14 10:31:48 2025 +0000

                Vectorizer bug fix: address argument of memory operations (#3446)

                Fix bug: use address arg of the first instruction in a group

                ... not the last!

                Only matters for arrays at the moment, where the address offset
                argument is not always the same register.

            commit cc91e2b
            Author: Vincent Laviron <[email protected]>
            Date:   Tue Jan 14 11:26:59 2025 +0100

                caml_update_dummy: fail on closure blocks (#3429)

            commit 17a01a9
            Author: Mark Shinwell <[email protected]>
            Date:   Tue Jan 14 10:07:39 2025 +0000

                Implement %array_element_size_in_bytes (#3367)

                Co-authored-by: Chris Casinghino <[email protected]>

            commit b487f71
            Author: Greta Yorsh <[email protected]>
            Date:   Mon Jan 13 14:05:25 2025 +0000

                Runtime: make types explicit when reading [gc_regs] (#3453)

                Runtime4: make types explicit when reading [gc_regs].

            commit 67e6eb3
            Author: Max Slater <[email protected]>
            Date:   Fri Jan 10 16:17:32 2025 -0500

                More capsule API updates (#3440)

            commit c7f573f
            Author: Mark Shinwell <[email protected]>
            Date:   Fri Jan 10 18:26:15 2025 +0000

                Reinstate %makearray_dynamic (#3460)

            commit e1e4fb8
            Author: Zesen Qian <[email protected]>
            Date:   Fri Jan 10 16:15:48 2025 +0000

                `portable` lazy allows `nonportable` thunk (#3436)

                * portable lazy allows nonportable thunk

                * add documentation

                * improve documentation

                * add examples

                * improve comments in test

                * say "not stronger"

            commit c30ec74
            Author: Ryan Tjoa <[email protected]>
            Date:   Fri Jan 10 10:41:08 2025 -0500

                Check for type recursion without boxing (#3407)

            commit cb290c5
            Author: Greta Yorsh <[email protected]>
            Date:   Fri Jan 10 11:00:32 2025 +0000

                Vectorizer: rename New (#3454)

                Rename New to New_vec128 to make the type clear

                and distinguish it from the upcoming Valx2

            commit bd39e02
            Author: Greta Yorsh <[email protected]>
            Date:   Fri Jan 10 10:24:48 2025 +0000

                Add function [DLL.for_all_i] (#3442)

                * Add function [DLL.for_all_i]

                * Rename to [for_alli] to match existing [mapi] and [iteri]

                * Remove unused argument of [aux] in [DLL.for_all*]

            commit c048920
            Author: Greta Yorsh <[email protected]>
            Date:   Thu Jan 9 13:16:36 2025 +0000

                Cleanup machtype_component size (#3441)

                Cleanup size_component

            commit 830d5e7
            Author: Greta Yorsh <[email protected]>
            Date:   Thu Jan 9 13:15:59 2025 +0000

                Add "dump-vectorize" to OCAMLPARAM (#3443)

                Add [dump-vectorize] to OCAMLPARAM for debugging

            commit 157c95e
            Author: Greta Yorsh <[email protected]>
            Date:   Thu Jan 9 13:15:33 2025 +0000

                Vectorizer bug fix: 128-bit vectorized constant   (#3447)

                Fix bug: 128-bit vectorized constant high/low correctly ordered

            commit 648155d
            Author: Greta Yorsh <[email protected]>
            Date:   Thu Jan 9 13:09:48 2025 +0000

                Add [Printreg.reglist] for debugging (#3444)

            commit d40254f
            Author: Stephen Dolan <[email protected]>
            Date:   Tue Jan 7 21:25:45 2025 +0000

                Move two misplaced files (#3435)

            commit 4a0bb69
            Author: dkalinichenko-js <[email protected]>
            Date:   Tue Jan 7 15:34:27 2025 -0500

                `Yielding` mode axis (#3283)

                * `Yielding` mode axis

                * Tests

                * fix printing

                ---------

                Co-authored-by: Diana Kalinichenko <[email protected]>

            commit 00275e0
            Author: Max Slater <[email protected]>
            Date:   Mon Jan 6 13:05:58 2025 -0500

                Unbox_float32 should check custom ops name (#3433)

                check sym name

            commit 2e49469
            Author: Max Slater <[email protected]>
            Date:   Mon Jan 6 13:05:03 2025 -0500

                Make Capsule preserve wrapped exception backtraces (#3421)

                * with_password

                * portable

                * don't use polymorphic parameters

                * review

                * protect encapsulated from other capsule

                * raise wrapped exceptions with existing backtrace

                * cr

            commit 2de23a5
            Author: Ryan Tjoa <[email protected]>
            Date:   Mon Jan 6 04:04:29 2025 -0500

                Fix CI by using `setup-ocaml` v3 for ocamlformat workflow (#3426)

                [CI] Use setup-ocaml v3 for ocamlformat workflow

            commit eada0f1
            Author: Ryan Tjoa <[email protected]>
            Date:   Fri Jan 3 21:23:23 2025 -0500

                Move unboxed records to stable (#3419)

            commit a273a33
            Author: Jacob Van Buren <[email protected]>
            Date:   Fri Jan 3 11:17:18 2025 -0500

                Changed make fmt to run in parallel (#3422)

                changed make fmt to run in parallel

            commit 4de5a72
            Author: Max Slater <[email protected]>
            Date:   Thu Jan 2 20:10:08 2025 -0500

                Add `Capsule.with_password` (#3420)

            commit b084ff3
            Author: Greta Yorsh <[email protected]>
            Date:   Wed Jan 1 15:34:11 2025 +0000

                vectorizer: new test (#3418)

                Add test for register compatiblity

            commit 5549015
            Author: Greta Yorsh <[email protected]>
            Date:   Tue Dec 31 17:20:56 2024 +0000

                Vectorizer: check register compatibility (#3412)

                Check that registers are compatible when joining computations
  • Loading branch information
jvanburen committed Jan 22, 2025
1 parent 1b4978a commit 39828c2
Show file tree
Hide file tree
Showing 325 changed files with 27,359 additions and 3,715 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ jobs:
config: --enable-middle-end=flambda2 --disable-warn-error
os: macos-latest

- name: flambda2_macos_arm64_irc
config: --enable-middle-end=flambda2 --disable-warn-error
- name: flambda2_macos_arm64_runtime5_irc
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
os: macos-latest
build_ocamlparam: '_,w=-46,regalloc=irc'
ocamlparam: '_,w=-46,regalloc=irc'

- name: flambda2_macos_arm64_ls
config: --enable-middle-end=flambda2 --disable-warn-error
- name: flambda2_macos_arm64_runtime5_ls
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
os: macos-latest
build_ocamlparam: '_,w=-46,regalloc=ls'
ocamlparam: '_,w=-46,regalloc=ls'
Expand Down Expand Up @@ -282,22 +282,22 @@ jobs:
run: |
PATH=$GITHUB_WORKSPACE/ocaml-414/_install/bin:$PATH make check_all_arches
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: cores
name: cores-${{ github.sha }}
path: /cores

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: _build
name: _build-${{ github.sha }}
path: $GITHUB_WORKSPACE/_build

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: _runtest
name: _runtest-${{ github.sha }}
path: $GITHUB_WORKSPACE/_runtest

concurrency:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ jobs:
# BUILD_OCAMLPARAM: ${{ matrix.ocamlparam }}
#
# - name: Publish coverage report
# uses: actions/upload-artifact@v3
# uses: actions/upload-artifact@v4
# with:
# name: coverage
# name: coverage-${{ github.sha }}
# path: flambda_backend/_coverage/**
#
2 changes: 1 addition & 1 deletion .github/workflows/ocamlformat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
path: 'flambda_backend'

- name: Setup OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v2
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ promote:

.PHONY: fmt
fmt:
ocamlformat -i $$(find . \( -name "*.ml" -or -name "*.mli" \))
find . \( -name "*.ml" -or -name "*.mli" \) | xargs -P $$(nproc 2>/dev/null || echo 1) -n 20 ocamlformat -i

.PHONY: check-fmt
check-fmt:
Expand Down
12 changes: 12 additions & 0 deletions asmcomp/asmlink.ml
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,16 @@ let sourcefile_for_dwarf ~named_startup_file filename =
if named_startup_file then filename
else ".startup"

let emit_ocamlrunparam ~ppf_dump =
Asmgen.compile_phrase ~ppf_dump
(Cmm.Cdata [
Cmm.Cdefine_symbol {
sym_name = "caml_ocamlrunparam";
sym_global = Global
};
Cmm.Cstring (!Clflags.ocamlrunparam ^ "\000")
])

let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_gen =
Location.input_name := "caml_startup"; (* set name of "current" input *)
let startup_comp_unit =
Expand All @@ -361,6 +371,7 @@ let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_g
let compile_phrase p = Asmgen.compile_phrase ~ppf_dump p in
let name_list =
List.flatten (List.map (fun u -> u.defines) units) in
emit_ocamlrunparam ~ppf_dump;
List.iter compile_phrase (Cmm_helpers.entry_point name_list);
List.iter compile_phrase
(* Emit the GC roots table, for dynlink. *)
Expand Down Expand Up @@ -414,6 +425,7 @@ let make_shared_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units =
Emitaux.Dwarf_helpers.init ~disable_dwarf:(not !Dwarf_flags.dwarf_for_startup_file)
~sourcefile:sourcefile_for_dwarf;
Emit.begin_assembly unix;
emit_ocamlrunparam ~ppf_dump;
List.iter compile_phrase
(Cmm_helpers.emit_gc_roots_table ~symbols:[]
(Generic_fns.compile ~shared:true genfns));
Expand Down
19 changes: 12 additions & 7 deletions backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "backend/amd64/CSE.ml"
(**************************************************************************)
(* *)
(* OCaml *)
Expand All @@ -21,6 +20,12 @@ open Arch
open Mach
open CSE_utils

let of_simd_class (cl : Simd.operation_class) =
match cl with
| Pure -> Op_pure
| Load { is_mutable = true } -> Op_load Mutable
| Load { is_mutable = false } -> Op_load Immutable

class cse = object

inherit CSEgen.cse_generic as super
Expand All @@ -37,9 +42,9 @@ method! class_of_operation op =
| Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence -> Op_other
| Isimd op ->
begin match Simd.class_of_operation op with
| Pure -> Op_pure
end
of_simd_class (Simd.class_of_operation op)
| Isimd_mem (op,_addr) ->
of_simd_class (Simd.Mem.class_of_operation op)
| Ipause
| Icldemote _
| Iprefetch _ -> Op_other
Expand Down Expand Up @@ -81,9 +86,9 @@ class cfg_cse = object
| Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence -> Op_other
| Isimd op ->
begin match Simd.class_of_operation op with
| Pure -> Op_pure
end
of_simd_class (Simd.class_of_operation op)
| Isimd_mem (op,_addr) ->
of_simd_class (Simd.Mem.class_of_operation op)
| Ipause
| Icldemote _
| Iprefetch _ -> Op_other
Expand Down
19 changes: 14 additions & 5 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "backend/amd64/arch.ml"
(**************************************************************************)
(* *)
(* OCaml *)
Expand Down Expand Up @@ -153,6 +152,9 @@ type specific_operation =
| Imfence (* memory fence *)
| Ipause (* hint for spin-wait loops *)
| Isimd of Simd.operation (* SIMD instruction set operations *)
| Isimd_mem of Simd.Mem.operation * addressing_mode
(* SIMD instruction set operations
with memory args *)
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
| Iprefetch of (* memory prefetching hint *)
{ is_write: bool;
Expand Down Expand Up @@ -273,6 +275,8 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "rdpmc %a" printreg arg.(0)
| Isimd simd ->
Simd.print_operation printreg simd ppf arg
| Isimd_mem (simd, addr) ->
Simd.Mem.print_operation printreg (print_addressing printreg addr) simd ppf arg
| Ipause ->
fprintf ppf "pause"
| Icldemote _ ->
Expand All @@ -299,13 +303,14 @@ let operation_is_pure = function
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
| Isimd op -> Simd.is_pure op
| Isimd_mem (op, _addr) -> Simd.Mem.is_pure op

(* Specific operations that can raise *)
(* Keep in sync with [Vectorize_specific] *)
let operation_can_raise = function
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
| Ifloatarithmem _
| Irdtsc | Irdpmc | Ipause | Isimd _
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
Expand All @@ -314,7 +319,7 @@ let operation_can_raise = function
let operation_allocates = function
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
| Ifloatarithmem _
| Irdtsc | Irdpmc | Ipause | Isimd _
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
Expand Down Expand Up @@ -405,9 +410,11 @@ let equal_specific_operation left right =
&& equal_addressing_mode left_addr right_addr
| Isimd l, Isimd r ->
Simd.equal_operation l r
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
Simd.Mem.equal_operation l r && equal_addressing_mode al ar
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
false

(* addressing mode functions *)
Expand Down Expand Up @@ -512,7 +519,9 @@ let isomorphic_specific_operation op1 op2 =
&& equal_addressing_mode_without_displ left_addr right_addr
| Isimd l, Isimd r ->
Simd.equal_operation l r
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
Simd.Mem.equal_operation l r && equal_addressing_mode_without_displ al ar
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
false
4 changes: 3 additions & 1 deletion backend/amd64/arch.mli
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "asmcomp/amd64/arch.mli"
(**************************************************************************)
(* *)
(* OCaml *)
Expand Down Expand Up @@ -86,6 +85,9 @@ type specific_operation =
| Imfence (* memory fence *)
| Ipause (* hint for spin-wait loops *)
| Isimd of Simd.operation (* SIMD instruction set operations *)
| Isimd_mem of Simd.Mem.operation * addressing_mode
(* SIMD instruction set operations
with memory args *)
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
| Iprefetch of (* memory prefetching hint *)
{ is_write: bool;
Expand Down
19 changes: 16 additions & 3 deletions backend/amd64/cfg_selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,17 @@ let pseudoregs_for_operation op arg res =
| Intop (Iadd | Isub | Imul | Iand | Ior | Ixor)
| Floatop ((Float32 | Float64), (Iaddf | Isubf | Imulf | Idivf)) ->
[| res.(0); arg.(1) |], res
| Intop_atomic { op = Compare_and_swap; size = _; addr = _ } ->
| Intop_atomic { op = Compare_set; size = _; addr = _ } ->
(* first arg must be rax *)
let arg = Array.copy arg in
arg.(0) <- rax;
arg, res
| Intop_atomic { op = Fetch_and_add; size = _; addr = _ } ->
| Intop_atomic { op = Compare_exchange; size = _; addr = _ } ->
(* first arg must be rax, res.(0) must be rax. *)
let arg = Array.copy arg in
arg.(0) <- rax;
arg, [| rax |]
| Intop_atomic { op = Exchange | Fetch_and_add; size = _; addr = _ } ->
(* first arg must be the same as res.(0) *)
let arg = Array.copy arg in
arg.(0) <- res.(0);
Expand Down Expand Up @@ -86,14 +91,22 @@ let pseudoregs_for_operation op arg res =
edx (high) and eax (low). Make it simple and force the argument in rcx,
and rax and rdx clobbered *)
[| rcx |], res
| Specific (Isimd op) -> Simd_selection.pseudoregs_for_operation op arg res
| Specific (Isimd op) ->
Simd_selection.pseudoregs_for_operation
(Simd_proc.register_behavior op)
arg res
| Specific (Isimd_mem (op, _addr)) ->
Simd_selection.pseudoregs_for_operation
(Simd_proc.Mem.register_behavior op)
arg res
| Csel _ ->
(* last arg must be the same as res.(0) *)
let len = Array.length arg in
let arg = Array.copy arg in
arg.(len - 1) <- res.(0);
arg, res
(* Other instructions are regular *)
| Intop_atomic { op = Add | Sub | Land | Lor | Lxor; _ }
| Intop (Ipopcnt | Iclz _ | Ictz _ | Icomp _)
| Intop_imm ((Imulh _ | Idiv | Imod | Icomp _ | Ipopcnt | Iclz _ | Ictz _), _)
| Specific
Expand Down
Loading

0 comments on commit 39828c2

Please sign in to comment.