facebookresearch · mfschubert · Mar 2, 2024 · Mar 2, 2024 · Mar 2, 2024 · Mar 2, 2024
diff --git a/src/fmmax/pml.py b/src/fmmax/pml.py
@@ -105,7 +105,10 @@ def _crop_and_edge_pad_pml_region(
     """Crops the trailing dimensions of `permittivity` and applies edge padding."""
     i_width, j_width = widths
     if (i_width * 2, j_width * 2) >= permittivity.shape[-2:]:
-        raise ValueError()
+        raise ValueError(
+            f"`widths` {widths} are incompatible with permittivity shape "
+            f"{permittivity.shape}."
+        )
 
     arr_cropped = permittivity[
         ...,

diff --git a/src/fmmax/scattering.py b/src/fmmax/scattering.py
@@ -287,35 +287,36 @@ def _extend_s_matrix(
     #
     # phi_T = jnp.linalg.inv(omega_k @ phi)
     # term1 = diag(q) @ phi_T @ next_omega_k @ next_phi @ diag(1 / next_q)
-    term1 = q[..., :, jnp.newaxis] * jnp.linalg.solve(
+    # term1 = q[..., :, jnp.newaxis] * jnp.linalg.solve(
+    #     omega_k @ phi,
+    #     next_omega_k @ next_phi * (1 / next_q)[..., jnp.newaxis, :],
+    # )
+    term1 = utils.diag(q) @ utils.solve(
         omega_k @ phi,
-        next_omega_k @ next_phi * (1 / next_q)[..., jnp.newaxis, :],
+        next_omega_k @ next_phi @ utils.diag(1 / next_q),
     )
     # term2 = phi_T @ omega_k @ next_phi
-    term2 = jnp.linalg.solve(omega_k @ phi, omega_k @ next_phi)
+    term2 = utils.solve(omega_k @ phi, omega_k @ next_phi)
     i11 = i22 = 0.5 * (term1 + term2)
     i12 = i21 = 0.5 * (-term1 + term2)
 
     # Phase terms \hat{f}(d) defined near equation 4.2 of [1999 Whittaker]. These
     # describe phase accumulated by propagating across a layer for each eigenmode.
-    fd = jnp.exp(1j * q * layer_thickness)
-    fd_next = jnp.exp(1j * next_q * next_layer_thickness)
+    fd = utils.diag(jnp.exp(1j * q * layer_thickness))
+    fd_next = utils.diag(jnp.exp(1j * next_q * next_layer_thickness))
 
     # Update the s-matrix to include the present layer, following the recipe
     # given in equation 5.4 of [1999 Whittaker].
     s11, s12, s21, s22 = s_matrix_blocks
 
     # s11_next = inv(i11 - diag(fd) @ s12 @ i21) @ diag(fd) @ s11
-    term3 = i11 - fd[..., :, jnp.newaxis] * s12 @ i21
-    s11_next = jnp.linalg.solve(term3, fd[..., :, jnp.newaxis] * s11)
+    term3 = i11 - fd @ s12 @ i21
+    s11_next = utils.solve(term3, fd @ s11)
     # s12_next = inv(i11 - diag(fd) @ s12 @ i21) @ (diag(fd) @ s12 @ i22 - i12) @ diag(fd_next)
-    s12_next = jnp.linalg.solve(
-        term3,
-        (fd[..., :, jnp.newaxis] * s12 @ i22 - i12) * fd_next[..., jnp.newaxis, :],
-    )
+    s12_next = utils.solve(term3, (fd @ s12 @ i22 - i12) @ fd_next)
     s21_next = s22 @ i21 @ s11_next + s21
     # s22_next = s22 @ i21 @ s12_next + s22 @ i22 @ diag(fd_next)
-    s22_next = s22 @ i21 @ s12_next + s22 @ i22 * fd_next[..., jnp.newaxis, :]
+    s22_next = s22 @ i21 @ s12_next + s22 @ i22 @ fd_next
 
     return (s11_next, s12_next, s21_next, s22_next)
 

diff --git a/src/fmmax/utils.py b/src/fmmax/utils.py
@@ -20,6 +20,17 @@ def diag(x: jnp.ndarray) -> jnp.ndarray:
     return y.at[..., i, i].set(x)
 
 
+def solve(a: jnp.ndarray, b: jnp.ndarray) -> jnp.ndarray:
+    """A limited version of `linalg.solve` that has no batch dependency."""
+    # See https://github.com/google/jax/issues/20047
+    assert a.shape == b.shape
+    m = a.shape[-1]
+    a_flat = a.reshape((-1, m, m))
+    b_flat = b.reshape((-1, m, m))
+    results = [jnp.linalg.solve(af, bf) for af, bf in zip(a_flat, b_flat, strict=True)]
+    return jnp.asarray(results).reshape(a.shape)
+
+
 def angular_frequency_for_wavelength(wavelength: jnp.ndarray) -> jnp.ndarray:
     """Returns the angular frequency for the specified wavelength."""
     return 2 * jnp.pi / wavelength  # Since by our convention c == 1.
@@ -189,7 +200,7 @@ def _eig_bwd(
         * (eigenvectors_H @ eigenvectors)
         @ jnp.where(eye_mask, jnp.real(eigenvectors_H @ grad_eigenvectors_conj), 0.0)
     ) @ eigenvectors_H
-    grad_matrix = jnp.linalg.solve(eigenvectors_H, rhs)
+    grad_matrix = solve(eigenvectors_H, rhs)
 
     # Take the conjugate of the gradient, reverting to the jax convention
     # where gradients are with respect to complex parameters.

diff --git a/src/fmmax/vector.py b/src/fmmax/vector.py
@@ -7,7 +7,6 @@
 from typing import Callable, Dict, List, Tuple
 
 import jax
-import jax.example_libraries.optimizers as jopt
 import jax.numpy as jnp
 
 from fmmax import basis, fft, utils

diff --git a/tests/fmmax/test_utils.py b/tests/fmmax/test_utils.py
@@ -10,6 +10,7 @@
 import jax.numpy as jnp
 import numpy as onp
 import parameterized
+import pytest
 
 from fmmax import utils
 
@@ -305,3 +306,40 @@ def test_absolute_axes_out_of_range(self, axes, ndim):
     def test_absolute_axes_match_expected(self, axes, ndim, expected_axes):
         absolute_axes = utils.absolute_axes(axes, ndim)
         self.assertSequenceEqual(absolute_axes, expected_axes)
+
+
+class SolveTest(unittest.TestCase):
+    @pytest.mark.xfail
+    def test_jax_linear_solve_batch_dependence(self):
+        # This test fails because (for some reason) `jnp.linalg.solve` has a batch
+        # dependency. The error for batched solves appears to result in faulty
+        # scattering matrices.
+        # See https://github.com/google/jax/issues/20047
+        # When this test passes, it is probably safe to remove the custom solve
+        # and revert to the standard solve.
+        a, b = jax.random.normal(jax.random.PRNGKey(0), (2, 100, 100))
+        assert a.shape == b.shape == (100, 100)
+        a_batch = jnp.stack([a, a], axis=0)
+        b_batch = jnp.stack([b, b], axis=0)
+
+        sol_jax = jnp.linalg.solve(a, b)
+        sol_jax_with_batch = jnp.linalg.solve(a_batch, b_batch)
+        onp.testing.assert_array_equal(
+            sol_jax_with_batch[0, ...], sol_jax_with_batch[1, ...]
+        )  # passes
+        onp.testing.assert_array_equal(sol_jax, sol_jax_with_batch[0, ...])  # Fails
+        onp.testing.assert_array_equal(sol_jax, sol_jax_with_batch[1, ...])  # Fails
+
+    def test_linear_solve_batch_dependence(self):
+        a, b = jax.random.normal(jax.random.PRNGKey(0), (2, 100, 100))
+        assert a.shape == b.shape == (100, 100)
+        a_batch = jnp.stack([a, a], axis=0)
+        b_batch = jnp.stack([b, b], axis=0)
+
+        sol_jax = utils.solve(a, b)
+        sol_jax_with_batch = utils.solve(a_batch, b_batch)
+        onp.testing.assert_array_equal(
+            sol_jax_with_batch[0, ...], sol_jax_with_batch[1, ...]
+        )  # passes
+        onp.testing.assert_array_equal(sol_jax, sol_jax_with_batch[0, ...])
+        onp.testing.assert_array_equal(sol_jax, sol_jax_with_batch[1, ...])