diff --git a/.github/workflows/testing-gcc.yml b/.github/workflows/testing-gcc.yml
index e60a886afa4..706a0989e85 100644
--- a/.github/workflows/testing-gcc.yml
+++ b/.github/workflows/testing-gcc.yml
@@ -34,6 +34,7 @@ jobs:
 
     - name: Test
       run: |
+        export LSAN_OPTIONS=suppressions=$PWD/tools/docker/lsan.supp
         cd build
         ctest --output-on-failure
 
diff --git a/.github/workflows/testing-macos.yml b/.github/workflows/testing-macos.yml
index 5d32ea8b48a..12cf73a9421 100644
--- a/.github/workflows/testing-macos.yml
+++ b/.github/workflows/testing-macos.yml
@@ -6,6 +6,10 @@ on:
     - 'develop'
   pull_request:
 
+# Workaround issue in Xcode 14.1/2
+env:
+  DEVELOPER_DIR: /Applications/Xcode_14.0.1.app/Contents/Developer
+
 jobs:
   build-and-test:
     runs-on: macos-latest
@@ -16,7 +20,7 @@ jobs:
         use_openmp: [OPENMP=ON]
         use_smm: [SMM=blas]
         blas_impl: [accelerate,openblas]
-        mpi_suffix: [openmpi,mpich]
+        mpi_suffix: [openmpi]
         exclude:
           - use_mpi: MPI=OFF
             mpi_suffix: mpich
@@ -27,19 +31,14 @@ jobs:
         fetch-depth: 0
         submodules: true
 
-    - name: Install dependencies
+    - name: Install common dependencies
       run: |
         env HOMEBREW_NO_AUTO_UPDATE=1 brew install \
-          ninja \
-          openmpi
-
-    - name: Unlink OpenMPI
-      run: |
-        brew unlink openmpi
+          ninja
 
-    - name: Install MPICH
+    - name: Install ${{ matrix.mpi_suffix }}
       run: |
-        env HOMEBREW_NO_AUTO_UPDATE=1 brew install mpich
+        env HOMEBREW_NO_AUTO_UPDATE=1 brew install ${{ matrix.mpi_suffix }}
 
     - name: Configure
       run: |
@@ -53,7 +52,6 @@ jobs:
           -DUSE_${{ matrix.use_openmp }} \
           -DUSE_${{ matrix.use_smm }} \
           $([ "${{ matrix.blas_impl }}" = "openblas" ] && echo '-DCMAKE_PREFIX_PATH=/usr/local/opt/openblas') \
-          -DMPIEXEC_EXECUTABLE="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && command -v /usr/local/Cellar/open-mpi/*/bin/mpiexec || command -v /usr/local/Cellar/mpich/*/bin/mpiexec)" \
           -DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root")" \
           -DTEST_MPI_RANKS=1 \
           ..
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9b8f82a6d71..1def9bcfb38 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,10 +25,18 @@ repos:
   - id: check-yaml
   - id: check-symlinks
   - id: trailing-whitespace
+    exclude: >-
+      (?x)^(
+        tools/vecLibFort/.*|
+      )$
 - repo: https://github.com/pseewald/fprettify
   rev: v0.3.7
   hooks:
   - id: fprettify
+    exclude: >-
+      (?x)^(
+        tools/vecLibFort/.*|
+      )$
 - repo: https://github.com/cheshirekow/cmake-format-precommit
   rev: v0.6.13
   hooks:
@@ -64,3 +72,8 @@ repos:
     files: \.(c|cc|cxx|cpp|cl|frag|glsl|h|hpp|hxx|ih|ispc|ipp|java|js|m|mm|proto|textproto|vert)$
     args: ['-i', '-fallback-style=none', '--style=file']
     additional_dependencies: ['clang-format']
+    exclude: >-
+      (?x)^(
+        tools/vecLibFort/.*|
+      )$
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8a174910f21..d1dd70b41c4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -163,6 +163,23 @@ endif ()
 find_package(LAPACK REQUIRED) # needed for some of the integrated test routines,
                               # also calls find_package(BLAS)
 
+if (APPLE
+    AND (BLAS_LIBRARIES MATCHES "Accelerate"
+         OR BLAS_LIBRARIES MATCHES "vecLib" # automated search
+         OR BLA_VENDOR STREQUAL "Accelerate"
+         OR BLA_VENDOR STREQUAL "NAS" # user override
+        ))
+  message(CHECK_START "Looking for vecLibFort library")
+  find_library(VECLIBFORT_LIBRARY vecLibFort)
+  if (NOT VECLIBFORT_LIBRARY)
+    message(CHECK_FAIL "not found, building it")
+    add_subdirectory(tools/vecLibFort)
+    set(VECLIBFORT_LIBRARY vecLibFort)
+  else ()
+    message(CHECK_PASS "found at " ${VECLIBFORT_LIBRARY})
+  endif ()
+endif ()
+
 # =================================== Python this module looks preferably for
 # version 3 of Python. If not found, version 2 is searched. In CMake 3.15, if a
 # python virtual environment is activated, it will search the virtual
diff --git a/docs/guide/2-user-guide/1-installation/index.md b/docs/guide/2-user-guide/1-installation/index.md
index fc06dbb2b86..315edf4438a 100644
--- a/docs/guide/2-user-guide/1-installation/index.md
+++ b/docs/guide/2-user-guide/1-installation/index.md
@@ -9,8 +9,12 @@ You need:
 * [CMake](https://cmake.org/) (3.22+)
 * GNU make or Ninja
 * Fortran compiler which supports at least Fortran 2008 (including the TS 29113 when using the C-bindings)
-* BLAS+LAPACK implementation (reference, OpenBLAS and MKL have been tested. Note: DBCSR linked to OpenBLAS 0.3.6 gives wrong results on Power9 architectures.)
-* Python version installed (2.7 or 3.6+ have been tested)
+* BLAS+LAPACK implementation
+    * Reference BLAS/LAPACK, OpenBLAS and MKL have been tested and can be considered supported.
+    * On macOS [vecLibFort](https://github.com/mcg1969/vecLibFort) is required to use Accelerate and/or vecLib.
+      The build system will automatically build a bundled version if not found on the system.
+    * DBCSR linked to OpenBLAS 0.3.6 gives wrong results on Power9 architectures.
+* Python version installed (3.6+ have been tested)
 
 Optional:
 
diff --git a/docs/guide/3-developer-guide/3-programming/1-overview/index.md b/docs/guide/3-developer-guide/3-programming/1-overview/index.md
index 087667bbce9..88d37a270f9 100644
--- a/docs/guide/3-developer-guide/3-programming/1-overview/index.md
+++ b/docs/guide/3-developer-guide/3-programming/1-overview/index.md
@@ -46,7 +46,6 @@ Assumed square matrix with 20x20 matrix with 5x5 blocks and a 2x2 processor grid
 | `__NO_STATM_ACCESS`, `__STATM_RESIDENT` or `__STATM_TOTAL` | Toggle memory usage reporting between resident memory and total memory. In particular, macOS users must use `-D__NO_STATM_ACCESS` | Fortran |
 | `__NO_ABORT` | Avoid calling abort, but STOP instead (useful for coverage testing, and to avoid core dumps on some systems) | Fortran |
 | `__LIBXSMM` | Enable [LIBXSMM](https://github.com/hfp/libxsmm/) link for optimized small matrix multiplications on CPU | Fortran |
-| `__ACCELERATE` | Must be defined on macOS when Apple's Accelerate framework is used for BLAS and LAPACK (this is due to some interface incompatibilities between Accelerate and reference BLAS/LAPACK) | Fortran |
 | `NDEBUG`       | Assertions are stripped ("compiled out"), `NDEBUG` is the ANSI-conforming symbol name (not `__NDEBUG`). Regular release builds may carry assertions for safety | Fortran, C, C++ |
 | `__CRAY_PM_ACCEL_ENERGY` or `__CRAY_PM_ENERGY` | Switch on collectin energy profiling on Cray systems | Fortran |
 | `__DBCSR_ACC` | Enable Accelerator compilation | Fortran, C, C++ |
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 42934e9b0fa..951c510b86c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -176,8 +176,8 @@ if (APPLE)
   # fix /proc/self/statm can not be opened on macOS
   target_compile_definitions(dbcsr PRIVATE __NO_STATM_ACCESS)
 
-  if (BLAS_LIBRARIES MATCHES "Accelerate")
-    target_compile_definitions(dbcsr PRIVATE __ACCELERATE)
+  if (VECLIBFORT_LIBRARY)
+    target_link_libraries(dbcsr PRIVATE ${VECLIBFORT_LIBRARY})
   endif ()
 endif ()
 
@@ -243,6 +243,7 @@ if (USE_ACCEL)
   target_link_libraries(
     dbcsr
     PRIVATE $<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::cudart>
+            $<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::cuda_driver>
             $<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::cublas>
             $<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::nvrtc>
             $<$<BOOL:${WITH_CUDA_PROFILING}>:CUDA::nvToolsExt>
diff --git a/src/acc/hip/acc_hip.h b/src/acc/hip/acc_hip.h
index dc4f255fd9e..33800c01eef 100644
--- a/src/acc/hip/acc_hip.h
+++ b/src/acc/hip/acc_hip.h
@@ -12,7 +12,11 @@
 
 #include <hip/hip_runtime.h>
 #include <hip/hip_runtime_api.h>
-#include <hipblas.h>
+#if __has_include(<hipblas/hipblas.h>)
+#  include <hipblas/hipblas.h>
+#else
+#  include <hipblas.h>
+#endif
 #include <hip/hiprtc.h>
 
 #define ACC(x) hip##x
diff --git a/src/mm/dbcsr_mm_common.F b/src/mm/dbcsr_mm_common.F
index 937043f23e9..e12a4fbb455 100644
--- a/src/mm/dbcsr_mm_common.F
+++ b/src/mm/dbcsr_mm_common.F
@@ -579,11 +579,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, &
          INTEGER                                  :: blk, bp, bpe, row, col
 
          REAL(KIND=real_8), EXTERNAL              :: DDOT
-#if defined (__ACCELERATE)
-         REAL(KIND=real_8), EXTERNAL              :: SDOT
-#else
          REAL(KIND=real_4), EXTERNAL              :: SDOT
-#endif
 
 !   ---------------------------------------------------------------------------
 
diff --git a/src/mm/dbcsr_mm_multrec.F b/src/mm/dbcsr_mm_multrec.F
index d8d0420f782..35c77b938de 100644
--- a/src/mm/dbcsr_mm_multrec.F
+++ b/src/mm/dbcsr_mm_multrec.F
@@ -707,11 +707,7 @@ SUBROUTINE multrec_filtering_${nametype1}$ (filter_eps, nblks, rowi, coli, blkp,
          REAL(kind=real_8)                          :: nrm
 
          REAL(KIND=real_8), EXTERNAL                :: DZNRM2, DDOT
-#if defined (__ACCELERATE)
-         REAL(KIND=real_8), EXTERNAL                :: SCNRM2, SDOT
-#else
          REAL(KIND=real_4), EXTERNAL                :: SCNRM2, SDOT
-#endif
 
          REAL(kind=real_8)                          :: filter_eps_opt
 
diff --git a/src/ops/dbcsr_operations.F b/src/ops/dbcsr_operations.F
index 9f5bd5a1747..d16e5c1130c 100644
--- a/src/ops/dbcsr_operations.F
+++ b/src/ops/dbcsr_operations.F
@@ -1910,11 +1910,7 @@ SUBROUTINE dbcsr_filter_anytype(matrix, eps, method, &
       TYPE(dbcsr_iterator)                               :: iter
 
       REAL(KIND=real_8), EXTERNAL                        :: DZNRM2
-#if defined (__ACCELERATE)
-      REAL(KIND=real_8), EXTERNAL                        :: SCNRM2
-#else
       REAL(KIND=real_4), EXTERNAL                        :: SCNRM2
-#endif
 
 !   ---------------------------------------------------------------------------
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6be544b1fd1..2b050add41d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -95,23 +95,20 @@ set(dbcsr_unittest_common_SRCS dbcsr_test_add.F dbcsr_test_multiply.F)
 # instead of building a full-blown lib, it would be better to simply build an
 # OBJECT lib, but we would need cmake 3.12 to be able to specify
 # target_link_libraries on those to get the proper compile flags
-add_library(dbcsr_unittest_common STATIC ${dbcsr_unittest_common_SRCS})
+add_library(dbcsr_unittest_common OBJECT ${dbcsr_unittest_common_SRCS})
+target_link_libraries(dbcsr_unittest_common PUBLIC dbcsr)
 target_link_libraries(dbcsr_unittest_common PUBLIC ${BLAS_LIBRARIES}
                                                    ${LAPACK_LIBRARIES})
 if (OpenMP_FOUND)
   target_link_libraries(dbcsr_unittest_common PUBLIC OpenMP::OpenMP_Fortran)
 endif ()
 
-if (APPLE AND BLAS_LIBRARIES MATCHES "Accelerate")
-  target_compile_definitions(dbcsr_unittest_common PRIVATE __ACCELERATE)
-endif ()
-target_link_libraries(dbcsr_unittest_common PUBLIC dbcsr)
-
 # Compile Fortran tests
 foreach (dbcsr_test ${DBCSR_TESTS_FTN})
   add_executable(${dbcsr_test} ${${dbcsr_test}_SRCS})
-  target_link_libraries(${dbcsr_test} dbcsr_unittest_common)
+  target_link_libraries(${dbcsr_test} PUBLIC dbcsr_unittest_common)
   set_target_properties(${dbcsr_test} PROPERTIES LINKER_LANGUAGE Fortran)
+
   # register unittest executable with CMake
   if (USE_MPI)
     separate_arguments(MPIEXEC_PREFLAGS)
@@ -124,7 +121,6 @@ foreach (dbcsr_test ${DBCSR_TESTS_FTN})
     add_test(NAME ${dbcsr_test} COMMAND ${dbcsr_test})
   endif ()
   if (OpenMP_FOUND)
-    target_link_libraries(${dbcsr_test} OpenMP::OpenMP_Fortran)
     set_tests_properties(
       ${dbcsr_test} PROPERTIES ENVIRONMENT OMP_NUM_THREADS=${TEST_OMP_THREADS})
   endif ()
diff --git a/tests/dbcsr_test_add.F b/tests/dbcsr_test_add.F
index 692f9c914d7..30fe02679d3 100644
--- a/tests/dbcsr_test_add.F
+++ b/tests/dbcsr_test_add.F
@@ -377,11 +377,7 @@ SUBROUTINE dbcsr_check_add(test_name, matrix_a, dense_a_dbcsr, dense_a, dense_b,
 
       LOGICAL                                            :: valid
       REAL(real_4), ALLOCATABLE, DIMENSION(:)            :: work_sp
-#if defined (__ACCELERATE)
-      REAL(real_8), EXTERNAL                             :: clange, slamch, slange
-#else
       REAL(real_4), EXTERNAL                             :: clange, slamch, slange
-#endif
       REAL(real_8)                                       :: a_norm_dbcsr, a_norm_in, a_norm_out, &
                                                             b_norm, eps, residual
       REAL(real_8), ALLOCATABLE, DIMENSION(:)            :: work
diff --git a/tests/dbcsr_test_multiply.F b/tests/dbcsr_test_multiply.F
index 96081a15272..d36474e3b9f 100644
--- a/tests/dbcsr_test_multiply.F
+++ b/tests/dbcsr_test_multiply.F
@@ -553,11 +553,7 @@ SUBROUTINE dbcsr_check_multiply(test_name, matrix_c, dense_c_dbcsr, dense_a, den
 
       LOGICAL                                            :: valid
       REAL(real_4), ALLOCATABLE, DIMENSION(:)            :: work_sp
-#if defined (__ACCELERATE)
-      REAL(real_8), EXTERNAL                             :: clange, slamch, slange
-#else
       REAL(real_4), EXTERNAL                             :: clange, slamch, slange
-#endif
       REAL(real_8)                                       :: a_norm, b_norm, c_norm_dbcsr, c_norm_in, &
                                                             c_norm_out, eps, eps_norm, residual
       REAL(real_8), ALLOCATABLE, DIMENSION(:)            :: work
diff --git a/tools/docker/lsan.supp b/tools/docker/lsan.supp
index 028f0a11168..4cd8c021695 100644
--- a/tools/docker/lsan.supp
+++ b/tools/docker/lsan.supp
@@ -1,3 +1,5 @@
 # leak due to compiler bug triggered by combination of OOP and ALLOCATABLE
 leak:__dbcsr_tensor_types_MOD___copy_dbcsr_tensor_types_Dbcsr_tas_dist_t
 leak:__dbcsr_tensor_types_MOD___copy_dbcsr_tensor_types_Dbcsr_tas_blk_size_t
+# similar case, for gcc-13+
+leak:__dbcsr_tas_global_MOD___copy_dbcsr_tas_global_Dbcsr_tas_blk_size_arb
diff --git a/tools/vecLibFort/CMakeLists.txt b/tools/vecLibFort/CMakeLists.txt
new file mode 100644
index 00000000000..a36b6e15508
--- /dev/null
+++ b/tools/vecLibFort/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_library(vecLibFort STATIC vecLibFort.c)
+
+if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
+  target_compile_options(vecLibFort PRIVATE -flax-vector-conversions)
+endif ()
+
+install(
+  TARGETS vecLibFort
+  EXPORT DBCSRTargets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
diff --git a/tools/vecLibFort/LICENSE b/tools/vecLibFort/LICENSE
new file mode 100644
index 00000000000..36b7cd93cdf
--- /dev/null
+++ b/tools/vecLibFort/LICENSE
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/tools/vecLibFort/Makefile b/tools/vecLibFort/Makefile
new file mode 100644
index 00000000000..507d894e37e
--- /dev/null
+++ b/tools/vecLibFort/Makefile
@@ -0,0 +1,49 @@
+PREFIX=/usr/local
+LIBDIR=$(PREFIX)/lib
+
+CFLAGS=-O
+
+NAME=vecLibFort
+SOURCE=$(NAME).c
+OBJECT=$(NAME).o
+LIBRARY=lib$(NAME)
+STATIC=$(LIBRARY).a
+DYNAMIC=$(LIBRARY).dylib
+PRELOAD=$(LIBRARY)I.dylib
+INCLUDES=cloak.h static.h
+DEPEND=$(INCLUDES) Makefile
+
+all: static dynamic preload
+static: $(STATIC)
+dynamic: $(DYNAMIC)
+preload: $(PRELOAD)
+
+$(OBJECT): $(DEPEND)
+
+$(STATIC): $(OBJECT)
+	ar -cru $@ $^
+	ranlib $@
+
+$(DYNAMIC): $(OBJECT)
+	clang -shared -o $@ $^ \
+		-Wl,-reexport_framework -Wl,Accelerate \
+		-install_name $(LIBDIR)/$@
+
+$(PRELOAD): $(SOURCE) $(DEPEND)
+	clang -shared $(CFLAGS) -DVECLIBFORT_INTERPOSE -o $@ -O $(SOURCE) \
+		-Wl,-reexport_framework -Wl,Accelerate \
+		-install_name $(LIBDIR)/$@
+
+install: all
+	mkdir -p $(LIBDIR)
+	cp -f $(STATIC) $(LIBDIR)
+	cp -f $(DYNAMIC) $(LIBDIR)
+	cp -f $(PRELOAD) $(LIBDIR)
+
+clean:
+	rm -f $(OBJECT) $(STATIC) $(DYNAMIC) $(PRELOAD)
+
+check: tester.f90 $(OBJECT)
+	gfortran -o tester -O $^ -framework Accelerate 
+	./tester
+
diff --git a/tools/vecLibFort/README.md b/tools/vecLibFort/README.md
new file mode 100644
index 00000000000..a0149458c2a
--- /dev/null
+++ b/tools/vecLibFort/README.md
@@ -0,0 +1,212 @@
+## A GNU Fortran interface to Apple's Accelerate/vecLib BLAS/LAPACK
+
+### Introduction
+
+vecLibFort is lightweight but flexible "shim" designed to rectify
+the incompatibilities between the Accelerate/vecLib BLAS and LAPACK libraries
+shipped with Mac OS X and FORTRAN code compiled with modern compilers
+such as [GNU Fortran][].
+
+You *will* want this code if you are...
+
+  * compiling your code directly from FORTRAN source; *and*
+  * using Apple's BLAS and/or LAPACK for your linear algebra; *and*
+  * using single-precision or complex arithmetic. 
+
+You *will not* need this code if you are...
+
+  * using some other linear algebra package; *or*
+  * calling BLAS and LAPACK only from C; *or*
+  * using an alternative BLAS/LAPACK package ([OpenBlas][],[MKL][]); *or*
+  * using only double-precision real arithmetic.
+
+You *may* want this code if you are...
+
+  * running a *pre-compiled* program, or linking to a *pre-compiled*
+    library, that seems to exhibit bugs described in the [next](#background)
+    section. See the section [Preloaded (interposing) library](#preloaded) 
+    for more details on how you may be able to fix these programs without
+    recompilation.
+
+<a name="background"></a>
+### Background
+
+[Apple's vecLib framework][vecLib] provides both C and FORTRAN bindings for
+BLAS and LAPACK, the de-facto standard libraries for dense numerical linear
+algebra. Because there remains quite a bit of useful FORTRAN code out there
+that in turn depend on BLAS and LAPACK, this is certainly a welcome provision
+from Apple.
+
+Unfortunately, those FORTRAN bindings follow an [F2C][]-style return value
+convention, while [GNU Fortran][] uses a [different convention][gnufarg]. Most
+subroutines and functions work without modification; in particular, if you 
+rely solely on double-precision *real* arithmetic, you are fine. For single
+precision or complex arithmetic, there are two fatal incompatibilities:
+
+* Functions whose FORTRAN specifications call for returning single-precision
+  real values, such as ``sdot_`` and ``snrm2_``, actually return 
+  *double-precision* results in the Apple/F2C calling convention. GNU Fortran, 
+  on the other hand, expects to receive the single-precision result.
+* Functions designed to return complex values, whether single-precision or
+  double-precision, are converted to subroutines in the Apple/F2C convention, 
+  with a pointer to the return value serving as the first argument. (Note that
+  this differs from the CBLAS convention of passing a pointer to the
+  return value as the *final* argument.) GNU Fortran, on the other hand,
+  expects these values to be returned as a C-style return value.
+
+For programs that use single-precision or complex arithmetic, then, these
+incompatibilities *must* be addressed or incorrect results and crashes can
+occur. In some projects, these errors go uncorrected, because the use cases
+that exercise them are uncommon.
+
+One solution is to force GNU Fortran to adopt the older, F2C-style return
+value convention, using the ``-ff2c`` flag. If that solution is sufficient
+for you, then I encourage you to adopt it. Unfortunately, this may not be
+possible if there is other code or other libraries that you rely upon that
+assume the default GNU Fortran convention. And don't forget to rewrite your
+C code according to the F2C return value conventions.
+
+The approach taken by vecLibFort is to provide a thin translation layer
+between the F2C and GFortran worlds, for the few functions where there is a
+difference. For BLAS, this is simply a matter of wrapping Apple's CBLAS
+calls in a FORTRAN-friendly wrapper. For LAPACK, a bit of dlopen/dlsym
+trickery is required to avoid name conflicts.
+
+Still another option is to use a different BLAS and LAPACK library, such
+as [MKL][] or [OpenBlas][]. I am sure there are good arguments to be made
+for all three options.
+
+### Using vecLibFort
+
+This code can be used in one of three ways, and the included ``Makefile``
+builds all three for you. The only variable you may want to modify is the
+``PREFIX`` variable, which determines the install location ``$(PREFIX)/lib``.
+
+#### Dynamic library: ``libvecLibFort.dylib``
+
+The most straightforward way to use ``vecLibFort`` is by linking with the 
+standard dynamic library using ``-lvecLibFort``. Of course, if you installed
+the library in a non-standard location, you will need an ``-L<path>`` linker
+flag as well.
+
+If you use this apporach, you do *not* need to add ``-framework vecLib`` or
+``-framework Accelerate`` as well. That is because vecLibFort is built to 
+re-export all of Accelerate's symbols, even those it does not "fix". Thus it 
+serves as a *full replacement* for vecLib/Accelerate.
+
+#### Static library / direct inclusion: ``libvecLibFort.a``
+
+For new projects, feel free to add ``vecLibFort.c``, ``static.h``, and
+``cloak.h`` to your project, or link with the static library. You will also
+need to link ``-framework vecLib`` or ``-framework Accelerate``.
+
+<a name="preloaded"></a>
+#### Preloaded (interposing) library: ``libvecLibFortI.dylib``
+
+Suppose you have a program that is already compiled, but which apparently 
+exhibits the errors discussed herein. Or perhaps you are using a precompiled
+third-party library that has not implemented measures like these itself; but
+because it has already been linked to vecLib, the bugs are baked in. (If you 
+can alter the linking information of a dynamic library, I bow to your skill.)
+
+In these cases, there is a *preload* feature of Mac OSX's ``dyld`` system that
+can come in quite handy. The OS makes it possible to specify a library to be
+*preloaded* before the application, with a list of instructions to replace
+functions with alternate versions, a process known as *interposing*. The
+source file ``vecLibFort.c`` includes this interposing code, but it is
+wrapped with ``#ifdef VECLIBFORT_INTERPOSE`` to avoid clashing with the 
+non-interposing code.
+
+To use this library, you must add the full path to ``libvecLibFortI.dylib``
+to the [``DYLD_INSERT_LIBRARIES`` environment variable][DYLD]. For instance,
+if it has been installed in the default location, the command
+
+    DYLD_INSERT_LIBRARIES=/usr/local/lib/libvecLibFortI.dylib program
+
+will run the program ``program`` but with the BLAS and LAPACK calls corrected.
+
+Of course, this may not work---it may be that the bugs you are seeing are not
+in fact caused by the specific issues addressed by vecLibFort. Or I might not
+have implemented something correctly. (Bug reports are welcome.) And you
+should *not* use this if the program or library *already* uses the F2C 
+calling conventions correctly; you *will* break it.
+
+### Inspirations
+
+This code in ``vecLibFort.c`` is new, but the concepts that undergird it are 
+most certainly not. The inspirations include:
+
+* The [dotwrp project][dotwrp] project provides a simple FORTRAN-based wrapper
+  for the 5 most common problematic BLAS functions. Thanks to vecLib's CBLAS 
+  interface, the substitutions can be made statically. We have extended this 
+  approach to cover all of the relevant BLAS calls, and implemented it in C.
+* The dynamic substitution approach is heavily inspired by the method used by
+  [GNU Octave](https://www.gnu.org/software/octave/), as contributed by Jarno
+  Rajahaime. You can see the [here][blaswrap]. vecLibFort differs from Octave
+  in that it resolves the replacements lazily, eliminating the need for
+  lookup tables and (hopefully) improving performance. It also implements the 
+  full set of BLAS/LAPACK replacements, whereas Octave replaces only a subset.
+* The interposing implementation is explained in a variety of places on the 
+  Internet, including section 2.6.3.4 of Amit Singh's book "Mac OSX
+  Internals." (http://osxbook.com). Point your favorite search engine to the
+  term [``DYLD_INSERT_LIBRARIES``][Google] to find a wealth of material.
+* In order to make the primary source file as compact as possible, this code
+  employs a simple preprocessor library by Paul Fultz II called [Cloak][]. The
+  [Boost Preprocessor Library][Boost] is perhaps a more well known example
+  of this kind of work, but it is far more complex than needed in this case.
+
+### License
+
+##### English
+
+I've released this under the [Boost Software License][boost]. So do whatever
+you wish with it. You do not have to redistribute the source code; but if you
+do, you must include the license with it.
+
+If you do use this in your projects, I would appreciate it if you would give 
+me credit, as I have attempted to do in the previous section. But I'm not 
+going to get bent out of shape about it. Large piles of cash are welcome, as 
+are simple emails of gratitude, or pull requests!
+
+##### Legalese
+
+> Boost Software License - Version 1.0 - August 17th, 2003
+> 
+> Permission is hereby granted, free of charge, to any person or organization
+> obtaining a copy of the software and accompanying documentation covered by
+> this license (the "Software") to use, reproduce, display, distribute,
+> execute, and transmit the Software, and to prepare derivative works of the
+> Software, and to permit third-parties to whom the Software is furnished to
+> do so, all subject to the following:
+> 
+> The copyright notices in the Software and this entire statement, including
+> the above license grant, this restriction and the following disclaimer,
+> must be included in all copies of the Software, in whole or in part, and
+> all derivative works of the Software, unless such copies or derivative
+> works are solely in the form of machine-executable object code generated by
+> a source language processor.
+> 
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+> SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+> FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+> ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+> DEALINGS IN THE SOFTWARE.
+
+[vecLib]:https://developer.apple.com/library/mac/documentation/Performance/Conceptual/vecLib/Reference/reference.html
+[GNU Fortran]:http://gcc.gnu.org/fortran/
+[gnufarg]:http://gcc.gnu.org/onlinedocs/gfortran/Argument-passing-conventions.html
+[F2C]:http://www.netlib.org/f2c/
+[DYLD]:https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man1/dyld.1.html
+[dotwrp]:https://github.com/tenomoto/dotwrp
+[GNU Octave]:https://www.gnu.org/software/octave/
+[blaswrap]:http://hg.savannah.gnu.org/hgweb/octave/file/tip/liboctave/cruft/misc/blaswrap.c
+[Google]:https://www.google.com/search?q=DYLD_INSERT_LIBRARIES
+[Cloak]:https://github.com/pfultz2/Cloak/blob/master/cloak.h
+[Boost]:http://www.boost.org/doc/libs/1_55_0/libs/preprocessor/doc/index.html 
+[OpenBLAS]:http://www.openblas.net/
+[MKL]:http://software.intel.com/en-us/intel-mkl
+[blasbug]:http://www.macresearch.org/lapackblas-fortran-106
+[boost]:http://www.boost.org/users/license.html
+
diff --git a/tools/vecLibFort/cloak.h b/tools/vecLibFort/cloak.h
new file mode 100644
index 00000000000..675b21de802
--- /dev/null
+++ b/tools/vecLibFort/cloak.h
@@ -0,0 +1,113 @@
+/*
+
+  Cloak
+  https://github.com/pfultz2/Cloak
+  A mini preprocessor library
+  Copyright (c) 2012-2014 Paul Fultz II  
+  
+  Use, modification and distribution is subject to the Boost Software 
+  License, Version 1.0. See the accompanying file LICENSE or
+
+      http://www.booost.org/LICENSE_1_0.txt
+
+  Permission granted by the author to include this file in vecLibFort
+  under the terms of this license.
+
+*/
+
+#define CAT(a, ...) PRIMITIVE_CAT(a, __VA_ARGS__)
+#define PRIMITIVE_CAT(a, ...) a ## __VA_ARGS__
+
+#define INC(x) PRIMITIVE_CAT(INC_, x)
+#define INC_0 1
+#define INC_1 2
+#define INC_2 3
+#define INC_3 4
+#define INC_4 5
+#define INC_5 6
+#define INC_6 7
+#define INC_7 8
+#define INC_8 9
+#define INC_9 10
+#define INC_10 11
+#define INC_11 12
+
+#define DEC(x) PRIMITIVE_CAT(DEC_, x)
+#define DEC_0 0
+#define DEC_1 0
+#define DEC_2 1
+#define DEC_3 2
+#define DEC_4 3
+#define DEC_5 4
+#define DEC_6 5
+#define DEC_7 6
+#define DEC_8 7
+#define DEC_9 8
+#define DEC_10 9
+#define DEC_11 10  
+
+#define EXPR_S(s) PRIMITIVE_CAT(EXPR_, s)
+#define EXPR_0(...) __VA_ARGS__
+#define EXPR_1(...) __VA_ARGS__
+#define EXPR_2(...) __VA_ARGS__
+#define EXPR_3(...) __VA_ARGS__
+#define EXPR_4(...) __VA_ARGS__
+#define EXPR_5(...) __VA_ARGS__
+#define EXPR_6(...) __VA_ARGS__
+#define EXPR_7(...) __VA_ARGS__
+#define EXPR_8(...) __VA_ARGS__
+#define EXPR_9(...) __VA_ARGS__
+#define EXPR_10(...) __VA_ARGS__
+#define EXPR_11(...) __VA_ARGS__ 
+#define EXPR_12(...) __VA_ARGS__
+  
+#define CHECK_N(x, n, ...) n
+#define CHECK(...) CHECK_N(__VA_ARGS__, 0,)
+
+#define NOT(x) CHECK(PRIMITIVE_CAT(NOT_, x))
+#define NOT_0 ~, 1,
+
+#define COMPL(b) PRIMITIVE_CAT(COMPL_, b)
+#define COMPL_0 1
+#define COMPL_1 0
+
+#define BOOL(x) COMPL(NOT(x))
+
+#define IIF(c) PRIMITIVE_CAT(IIF_, c)
+#define IIF_0(t, ...) __VA_ARGS__
+#define IIF_1(t, ...) t
+
+#define IF(c) IIF(BOOL(c))
+
+#define EAT(...)
+#define EXPAND(...) __VA_ARGS__
+#define WHEN(c) IF(c)(EXPAND, EAT)
+
+#define EMPTY()
+#define DEFER(id) id EMPTY()
+#define OBSTRUCT(id) id DEFER(EMPTY)()
+     
+//#define REPEAT_S(s, n, m, ...) \
+//        IF(n)(REPEAT_I, EAT)(OBSTRUCT(), INC(s), DEC(n), m, __VA_ARGS__)
+//        
+//#define REPEAT_INDIRECT() REPEAT_S
+//#define REPEAT_I(_, s, n, m, ...) \
+//        EXPR_S _(s)( \
+//            REPEAT_INDIRECT _()(s, n, m, __VA_ARGS__) \
+//        )\
+//        m _(s, n, __VA_ARGS__)
+        
+#define REPEAT_S(s, n, m, ...) \
+        REPEAT_I(OBSTRUCT(), INC(s), n, m, __VA_ARGS__)
+        
+#define REPEAT_INDIRECT() REPEAT_I
+#define REPEAT_I(_, s, n, m, ...) \
+        WHEN _(n)(EXPR_S _(s)( \
+            REPEAT_INDIRECT _()(OBSTRUCT _(), INC _(s), DEC _(n), m, __VA_ARGS__) \
+        ))\
+        m _(s, n, __VA_ARGS__)
+
+#define COMMA() ,
+
+#define COMMA_IF(n) IF(n)(COMMA, EAT)()
+
diff --git a/tools/vecLibFort/static.h b/tools/vecLibFort/static.h
new file mode 100644
index 00000000000..5cbbe5fae96
--- /dev/null
+++ b/tools/vecLibFort/static.h
@@ -0,0 +1,119 @@
+/*
+
+  vecLibFort
+  https://github.com/mcg1969/vecLibFort
+  Run-time F2C/GFORTRAN translation for Apple's vecLib BLAS/LAPACK
+  Copyright (c) 2014 Michael C. Grant
+
+  See README.md for full background and usage details.
+
+  Use, modification and distribution is subject to the Boost Software 
+  License, Version 1.0. See the accompanying file LICENSE or
+
+      http://www.booost.org/LICENSE_1_0.txt
+
+*/
+
+#if defined(ADD_UNDERSCORE)
+#define FNAME(x) x ## _
+#define STATIC 
+#elif defined(ADD_PREFIX)
+#define FNAME(x) my_ ## x
+#define STATIC static
+#else
+#define FNAME(x) x
+#define STATIC 
+#endif
+
+STATIC float FNAME(sdsdot)( const int* N, const float* alpha, const float* X, const int* incX, const float* Y, const int* incY )
+{
+  DEBUG_S( "sdsdot" )
+  return cblas_sdsdot( *N, *alpha, X, *incX, Y, *incY );
+}
+
+STATIC float FNAME(sdot)( const int* N, const float* X, const int* incX, const float* Y, const int* incY )
+{
+  DEBUG_S( "sdot" )
+  return cblas_sdot( *N, X, *incX, Y, *incY );
+}
+
+STATIC float FNAME(snrm2)( const int* N, const float* X, const int* incX )
+{
+  DEBUG_S( "snrm2" )
+  return cblas_snrm2( *N, X, *incX );
+}
+
+STATIC float FNAME(sasum)( const int* N, const float *X, const int* incX )
+{
+  DEBUG_S( "sasum" )
+  return cblas_sasum( *N, X, *incX );
+}
+
+STATIC c_float FNAME(cdotu)( const int* N, const void* X, const int* incX, const void* Y, const int* incY )
+{
+  DEBUG_S( "cdotu" )
+  c_float ans;
+  cblas_cdotu_sub( *N, X, *incX, Y, *incY, &ans );
+  return ans;
+}
+
+STATIC c_float FNAME(cdotc)( const int* N, const void* X, const int* incX, const void* Y, const int* incY )
+{
+  DEBUG_S( "cdotc" )
+  c_float ans;
+  cblas_cdotc_sub( *N, X, *incX, Y, *incY, &ans );
+  return ans;
+}
+
+STATIC float FNAME(scnrm2)( const int* N, const void* X, const int* incX )
+{
+  DEBUG_S( "scnrm2" )
+  return cblas_scnrm2( *N, X, *incX );
+}
+
+STATIC float FNAME(scasum)( const int* N, const void *X, const int* incX )
+{
+  DEBUG_S( "scasum" )
+  return cblas_scasum( *N, X, *incX );
+}
+
+STATIC c_double FNAME(zdotu)( const int* N, const void* X, const int* incX, const void* Y, const int* incY )
+{
+  DEBUG_S( "zdotu" )
+  c_double ans;
+  cblas_zdotu_sub( *N, X, *incX, Y, *incY, &ans );
+  return ans;
+}
+
+STATIC c_double FNAME(zdotc)( const int* N, const void* X, const int* incX, const void* Y, const int* incY )
+{
+  DEBUG_S( "zdotc" )
+  c_double ans;
+  cblas_zdotc_sub( *N, X, *incX, Y, *incY, &ans );
+  return ans;
+}
+
+#ifdef VECLIBFORT_SGEMV
+STATIC void FNAME(sgemv)( const char* trans, const int* m, const int* n, 
+  const float* alpha, const float* A, const int* ldA,
+  const float* X, const int* incX,
+  const float* beta, float* Y, const int* incY )
+{
+  DEBUG_S( "sgemv" )
+  enum CBLAS_TRANSPOSE T;
+  switch ( trans[0] ) {
+    case 'T': case 't':
+      if ( ((intptr_t)X|(intptr_t)A|(intptr_t)Y)%32 == 0 ) { T = CblasTrans; break; }
+      /* Implement as alpha * X^T * A + beta * Y^T */
+      cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 1, *n, *m, *alpha, X, *incX, A, *ldA, *beta, Y, *incY );
+      return;
+    default:
+      T = CblasNoTrans;
+  }
+  cblas_sgemv( CblasColMajor, T, *m, *n, *alpha, A, *ldA, X, *incX, *beta, Y, *incY );
+}
+#endif
+
+#undef FNAME
+#undef STATIC
+
diff --git a/tools/vecLibFort/tester.f90 b/tools/vecLibFort/tester.f90
new file mode 100644
index 00000000000..adf2ca7fd7a
--- /dev/null
+++ b/tools/vecLibFort/tester.f90
@@ -0,0 +1,39 @@
+real,           dimension(2,6) :: a
+complex,        dimension(2,4) :: b
+double complex, dimension(2,3) :: c
+
+real sdot, sdsdot, snrm2, sasum, scnrm2, scasum, slamch
+real slange, clange, slansy, clansy
+complex cdotu, cdotc
+double complex zdotu, zdotc
+
+a = transpose(reshape([1,3,2,4,3,5, 6,4,5,3,4,2],[6,2]))
+b = transpose(reshape([(1,2),(3,4),(5,6),(7,8), (8,1),(7,2),(6,3),(5,4)],[4,2]))
+c = transpose(reshape([(3,2),(2,4),(1,6), (4,6),(5,4),(6,2)],[3,2]))
+
+write(*,*) 'If the return value interface is fixed, none of these values will'
+write(*,*) 'be zero, nor will they be nonsensically large or small. On the'
+write(*,*) 'other hand, if the translation is incorrect, it is more likely'
+write(*,*) 'that this program will carsh.'
+write(*,*) ' '
+
+write(*,*) sdot(6,a(1,:),1,a(2,:),1), sdsdot(6,2.0,a(1,:),1,a(2,:),1), &
+	snrm2(6,a(1,:),1), sasum(6,a(2,:),1)
+write(*,*) cdotu(4,b(1,:),1,b(2,:),1), cdotc(4,b(1,:),1,b(2,:),1)
+write(*,*) scnrm2(4,b(1,:),1), scasum(4,b(2,:),1)
+write(*,*) zdotu(3,c(1,:),1,c(2,:),1)
+write(*,*) zdotc(3,c(1,:),1,c(2,:),1)
+
+write(*,*) slange('F',2,6,a,2,a),clange('F',2,4,b,2,b), &
+	slansy('F','L',2,a,2,a),clansy('F','L',2,a,2,a)
+
+write(*,*) ' '
+write(*,*) 'These are the machine constants generated by SLAMCH. We expect'
+write(*,*) 'some of them to be small (E-08,E-38).'
+write(*,*) ' '
+
+write(*,*) slamch('E'),slamch('S'),slamch('B')
+write(*,*) slamch('P'),slamch('R'),slamch('M')
+write(*,*) slamch('U'),slamch('L'),slamch('O')
+
+end
diff --git a/tools/vecLibFort/vecLib-760.100.h b/tools/vecLibFort/vecLib-760.100.h
new file mode 100644
index 00000000000..cb0596444f4
--- /dev/null
+++ b/tools/vecLibFort/vecLib-760.100.h
@@ -0,0 +1,68 @@
+/*
+ * Modeled from Apple's vecLib-760.10 instance of vecLib.h:
+ * /Library/Developer/CommandLineTools/SDKs/MacOSX11.3.sdk/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers/vecLib.h
+ */
+
+#ifndef __VECLIB__
+#define __VECLIB__
+
+#ifndef __VECLIBTYPES__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vecLibTypes.h>
+#endif
+
+#ifndef __VBASICOPS__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vBasicOps.h>
+#endif
+
+#ifndef __VBIGNUM__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vBigNum.h>
+#endif
+
+#ifndef __VECTOROPS__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vectorOps.h>
+#endif
+
+#ifndef __VFP__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vfp.h>
+#endif
+
+#ifndef __VDSP__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vDSP.h>
+#endif
+
+#if defined __ppc__ || defined __i386__
+#ifndef __VDSP_TRANSLATE__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vDSP_translate.h>
+#endif
+#endif
+
+#ifndef CBLAS_H	
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/cblas.h>
+#endif
+
+#ifndef __CLAPACK_H
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/clapack.h>
+#endif
+
+#ifndef __LINEAR_ALGEBRA_PUBLIC_HEADER__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/LinearAlgebra/LinearAlgebra.h>
+#endif
+
+#ifndef __SPARSE_HEADER__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/Sparse/Sparse.h>
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/Sparse/Solve.h>
+#endif
+
+#ifndef __QUADRATURE_PUBLIC_HEADER__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/Quadrature/Quadrature.h>
+#endif // __QUADRATURE_PUBLIC_HEADER__
+
+#ifndef __BNNS_HEADER__
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/BNNS/bnns.h>
+#endif // __BNNS_HEADER__
+
+#ifndef __VFORCE_H
+#include <Accelerate/../Frameworks/vecLib.framework/Headers/vForce.h>
+#endif
+
+#endif /* __VECLIB__ */
diff --git a/tools/vecLibFort/vecLibFort.c b/tools/vecLibFort/vecLibFort.c
new file mode 100644
index 00000000000..f5ec7c30011
--- /dev/null
+++ b/tools/vecLibFort/vecLibFort.c
@@ -0,0 +1,301 @@
+/*
+
+  vecLibFort
+  https://github.com/mcg1969/vecLibFort
+  Run-time F2C/GFORTRAN translation for Apple's vecLib BLAS/LAPACK
+  Copyright (c) 2014 Michael C. Grant
+
+  See README.md for full background and usage details.
+
+  Use, modification and distribution is subject to the Boost Software 
+  License, Version 1.0. See the accompanying file LICENSE or
+
+      http://www.booost.org/LICENSE_1_0.txt
+
+*/
+
+#include <stdio.h>
+#include "cloak.h"
+/* Don't load the CLAPACK header, because we are using a different calling
+   convention for the replaced functions than the ones listed there. */
+#define __CLAPACK_H
+#include "vecLib-760.100.h"
+#include <Accelerate/Accelerate.h>
+#include <AvailabilityMacros.h>
+
+/* Add a SGEMV fix for Mavericks. See
+  http://www.openradar.me/radar?id=5864367807528960 */
+
+#if !defined(VECLIBFORT_SGEMV) && \
+    defined(MAC_OS_X_VERSION_10_9) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 && \
+    !(defined(MAC_OS_X_VERSION_10_10) && \
+      MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
+#define VECLIBFORT_SGEMV
+#endif
+
+#define VOIDS_(s,i,id) COMMA_IF(i) void*
+#define VOIDS(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),VOIDS_,~)),void)
+#define VOIDA_(s,i,id) COMMA_IF(i) void *a ## i
+#define VOIDA(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),VOIDA_,~)),void)
+#define PARAM_(s,i,id) COMMA_IF(i)a ## i
+#define PARAM(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),PARAM_,~)),)
+
+#ifdef VECLIBFORT_VERBOSE
+#define DEBUG(...) fprintf(stderr,__VA_ARGS__);
+static const char* dynamic_msg = "Entering dynamic %s replacement\n";
+static const char* static_msg = "Entering static %s replacement\n";
+#define DEBUG_S(x) DEBUG( static_msg, x )
+#define DEBUG_D(x) DEBUG( dynamic_msg, x )
+
+#else
+#define DEBUG(...)
+#define DEBUG_S(x)
+#define DEBUG_D(x)
+#endif
+
+#include <complex.h>
+typedef float complex c_float;
+typedef double complex c_double;
+
+#ifdef VECLIBFORT_INTERPOSE
+
+/*
+ * INTERPOSING MODE
+ *
+ * In this mode, dyld is instructed to preload this library even before the
+ * executable itself. It reads the __DATA.__interpose section of the library
+ * for the interpose information, which it uses to swap out the offending
+ * BLAS/LAPACK functions with our replacements. Because vecLib provides two
+ * aliases for each function---one with a trailing underscore, and one
+ * without---we need two interpose records for each replacement.
+ *
+ * For instance, for "sdot", we define a static function
+ *    static float my_sdot( const int* N, const float* X, const int* incX )
+ * add interpose data to signify two substitutions:
+ *    sdot_ -> my_sdot
+ *    sdot  -> my_sdot
+ */
+
+typedef struct interpose_t_ {
+  const void *replacement;
+  const void *original;
+} interpose_t;
+
+#define INTERPOSE(name) \
+__attribute__((used)) interpose_t interpose_ ## name [] \
+__attribute__((section ("__DATA,__interpose"))) = \
+{ { (const void*)&my_ ## name, (const void*)&name }, \
+  { (const void*)&my_ ## name, (const void*)&name ## _ } };
+
+#define D2F_CALL(name,n) \
+extern double name( VOIDS(n) ); \
+extern double name ## _( VOIDS(n) ); \
+static float my_ ## name ( VOIDA(n) ) \
+{ return (float)name ## _( PARAM(n) ); } \
+INTERPOSE(name)
+
+#define CPLX_CALL(type,name,n) \
+extern void name( VOIDS(INC(n)) ); \
+extern void name ## _( VOIDS(INC(n)) ); \
+static c_ ## type my_ ## name ( VOIDA(n) ) \
+{ \
+  c_ ## type cplx; \
+  name ## _( &cplx, PARAM(n) ); \
+  return cplx; \
+} \
+INTERPOSE(name)
+
+/*
+ * DYNAMIC BLAS SUBSTITUTION
+ *
+ * For the interpose library we need to use the same techniques for the BLAS
+ * as we do for the LAPACK routines. However, because we have CBLAS versions
+ * available to use, we can use the wrappers already created in "static.h"
+ * by prepending them with the "my_" prefixes.
+ */
+
+#define BLS_CALL(type,name,n) \
+extern type name( VOIDS(n) ); \
+extern type name ## _( VOIDS(n) ); \
+INTERPOSE(name)
+  
+#define ADD_PREFIX
+#include "static.h"
+#undef ADD_PREFIX
+
+BLS_CALL(float,sdsdot,6)
+BLS_CALL(float,sdot,5)
+BLS_CALL(float,snrm2,3)
+BLS_CALL(float,sasum,3)
+BLS_CALL(c_float,cdotu,5)
+BLS_CALL(c_float,cdotc,5)
+BLS_CALL(float,scnrm2,3)
+BLS_CALL(float,scasum,3)
+BLS_CALL(c_double,zdotu,5)
+BLS_CALL(c_double,zdotc,5)
+#if defined(VECLIBFORT_SGEMV)
+BLS_CALL(void,sgemv,11)
+#endif
+
+#else
+
+/*
+ * STATIC BLAS SUBSTITUTION
+ * 
+ * For BLAS functions, we have access to CBLAS versions of each function.
+ * So the hoops we need to jump through to resolve the name clashes in the
+ * dynamic substitution mode can be avoided. Instead, we simply create the
+ * replacement functions to call the CBLAS counterparts instead.
+ *
+ * To void duplicating code, we include the functions in "static.h" twice:
+ * once for the functions with trailing underscores (e.g., "sdot_"), and once 
+ * without (e.g., "sdot"). In theory, we could create just one replacement
+ * with two aliases, but clang has thus far been uncooperative. Any assistance 
+ * on this matter would be appreciated.
+ */
+
+#include "static.h"
+#define ADD_UNDERSCORE
+#include "static.h"
+
+/*
+ * DYNAMIC LAPACK SUBSTITUTION
+ * 
+ * In this mode, we give our functions identical names, and rely on link
+ * order to ensure that these take precedence over those declared in vecLib.
+ * Thus whenever the main code attempts to call one of the covered functions,
+ * it will be directed to one of our wrappers instead.
+ *
+ * Because vecLib provides two aliases for each function---one with a
+ * trailing underscore, and one without---we actually need two separate
+ * replacement functions (at least until we can figure out how to do aliases
+ * cleanly in clang.) Each pair of replacements controls a single static
+ * pointer to the replacement function. On the first invocation of either,
+ * this pointer is retrieved using a dlsym() command.
+ *
+ * For instance, for "sdot", we define two functions
+ *    float sdot_( const int* N, const float* X, const int* incX )
+ *    float sdot ( const int* N, const float* X, const int* incX )
+ * On the first invocation of either, the "sdot_" symbol from vecLib is
+ * retrieved using the dlsym() command and stored in
+ *    static void* fp_dot;
+ * In theory, we could create just one replacement with two aliases, but 
+ * clang has thus far been uncooperative. Any assistance on this matter would
+ * be appreciated. 
+ */
+
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define VECLIB_FILE "/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/vecLib"
+
+static void * veclib = 0;
+
+static void unloadlib(void)
+{
+  DEBUG( "Unloading vecLib\n" );
+  dlclose (veclib);
+}
+
+static void loadlib(void)
+{
+  static const char* veclib_loc = VECLIB_FILE;
+  DEBUG( "Loading library: %s\n", veclib_loc )
+  veclib = dlopen (veclib_loc, RTLD_LOCAL | RTLD_FIRST);
+  if ( veclib == 0 ) {
+    fprintf( stderr, "Failed to open vecLib library; aborting.\n   Location: %s\n", veclib );
+    abort ();
+  }
+  atexit(unloadlib); 
+}
+
+static void* loadsym( const char* nm )
+{
+  if ( veclib == 0 ) loadlib();
+  DEBUG( "Loading function: %s\n", nm )
+  void *ans = dlsym( veclib, nm );
+  if ( ans != 0 ) return ans;
+  fprintf( stderr, "vecLib symbol '%s' could not be resolved; aborting.\n", nm );
+  abort();
+}
+
+#define D2F_CALL_(fname,name,n) \
+float fname( VOIDA(n) ) \
+{ \
+  DEBUG_D( #name "_" ) \
+  if ( !fp_ ## name ) fp_ ## name = loadsym( #name "_" ); \
+  return ((ft_ ## name)fp_ ## name)( PARAM(n) ); \
+}
+
+#define D2F_CALL(name,n) \
+typedef double (*ft_ ## name)( VOIDS(n) ); \
+static void *fp_ ## name = 0; \
+D2F_CALL_(name,name,n) \
+D2F_CALL_(name ## _,name,n)
+
+#define CPLX_CALL_(type,fname,name,n) \
+c_ ## type fname( VOIDA(n) ) \
+{ \
+  c_ ## type cplx; \
+  DEBUG_D( #name "_" ) \
+  if ( !fp_ ## name ) fp_ ## name = loadsym( #name "_" ); \
+  ((ft_ ## name)fp_ ## name)( &cplx, PARAM(n) ); \
+  return cplx; \
+}
+
+#define CPLX_CALL(type,name,n) \
+typedef void (*ft_ ## name)( VOIDS(INC(n)) ); \
+static void *fp_ ## name = 0; \
+CPLX_CALL_(type,name,name,n) \
+CPLX_CALL_(type,name ## _,name,n)
+
+#endif
+
+D2F_CALL(clangb,7)
+D2F_CALL(clange,6)
+D2F_CALL(clangt,5)
+D2F_CALL(clanhb,7)
+D2F_CALL(clanhe,6)
+D2F_CALL(clanhp,5)
+D2F_CALL(clanhs,5)
+D2F_CALL(clanht,4)
+D2F_CALL(clansb,7)
+D2F_CALL(clansp,5)
+D2F_CALL(clansy,6)
+D2F_CALL(clantb,8)
+D2F_CALL(clantp,6)
+D2F_CALL(clantr,8)
+
+D2F_CALL(scsum1,3)
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+D2F_CALL(slaneg,6)
+#endif
+D2F_CALL(slangb,7)
+D2F_CALL(slange,6)
+D2F_CALL(slangt,5)
+D2F_CALL(slanhs,5)
+D2F_CALL(slansb,7)
+D2F_CALL(slansp,5)
+D2F_CALL(slanst,4)
+D2F_CALL(slansy,6)
+D2F_CALL(slantb,8)
+D2F_CALL(slantp,6)
+D2F_CALL(slantr,8)
+D2F_CALL(slapy2,2)
+D2F_CALL(slapy3,3)
+D2F_CALL(slamch,1)
+D2F_CALL(slamc3,2)
+
+#if defined(MAC_OS_X_VERSION_10_7) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+D2F_CALL(clanhf,6)
+D2F_CALL(slansf,6)
+#endif
+
+CPLX_CALL(float,cladiv,2)
+CPLX_CALL(double,zladiv,2)
+
+