diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java index 3c32be3317..837045bbfc 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java +++ b/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java @@ -5,10 +5,9 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.pytorch.serve.device.Accelerator; import org.pytorch.serve.device.AcceleratorVendor; import org.pytorch.serve.device.interfaces.IAcceleratorUtility; @@ -75,15 +74,12 @@ public List extractAccelerators(JsonElement rootObject) { .getAsJsonObject() // Gets the outer object .get("SPDisplaysDataType") // Gets the "SPDisplaysDataType" element .getAsJsonArray(); + JsonObject gpuObject = displaysArray.get(0).getAsJsonObject(); - int number_of_cores = Integer.parseInt(gpuObject.get("sppci_cores").getAsString()); - - // add the object `number_of_cores` times to maintain the exsisitng - // functionality - accelerators = - IntStream.range(0, number_of_cores) - .mapToObj(i -> gpuObject) - .collect(Collectors.toList()); + + // Create list with only a single accelerator object as + // M1, M2, M3 Macs have only single integrated GPU + accelerators = Collections.singletonList(gpuObject); return accelerators; } diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index bd7f654ce7..865b232fae 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1372,8 +1372,17 @@ public void testMetricManager() throws JsonParseException, InterruptedException Assert.assertTrue(++count < 5); } - // 7 system-level metrics + 3 gpu-specific metrics - Assert.assertEquals(metrics.size(), 7 + 3 * configManager.getNumberOfGpu()); + // Determine if the device is Apple or not + String vendor = System.getProperty("os.name"); + if (vendor != null) { + if (vendor.startsWith("Mac")) { + // 7 system-level metrics + 2 gpu-specific metrics (per GPU) for Apple devices + Assert.assertEquals(metrics.size(), 7 + 2 * configManager.getNumberOfGpu()); + } else { + // 7 system-level metrics + 3 gpu-specific metrics (per GPU) for non-Apple devices + Assert.assertEquals(metrics.size(), 7 + 3 * configManager.getNumberOfGpu()); + } + } for (Metric metric : metrics) { String metricName = metric.getMetricName(); diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java index e333f7ec83..c52e105fc4 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java @@ -76,7 +76,7 @@ public void testExtractAcceleratorId() { public void testExtractAccelerators() { List accelerators = appleUtil.extractAccelerators(sampleOutputJson); - assertEquals(accelerators.size(), 7); + assertEquals(accelerators.size(), 1); assertEquals(accelerators.get(0).get("sppci_model").getAsString(), "Apple M1"); } @@ -88,7 +88,7 @@ public void testSmiOutputToUpdatedAccelerators() { ArrayList updatedAccelerators = appleUtil.smiOutputToUpdatedAccelerators(sampleOutputJson.toString(), parsedGpuIds); - assertEquals(updatedAccelerators.size(), 7); + assertEquals(updatedAccelerators.size(), 1); Accelerator accelerator = updatedAccelerators.get(0); assertEquals(accelerator.getAcceleratorModel(), "Apple M1"); assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE); @@ -112,7 +112,7 @@ public String[] getUtilizationSmiCommand() { ArrayList availableAccelerators = spyAppleUtil.getAvailableAccelerators(availableAcceleratorIds); - assertEquals(availableAccelerators.size(), 7); + assertEquals(availableAccelerators.size(), 1); Accelerator accelerator = availableAccelerators.get(0); assertEquals(accelerator.getAcceleratorModel(), "Apple M1"); assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE); diff --git a/frontend/server/src/test/java/org/pytorch/serve/util/ConfigManagerTest.java b/frontend/server/src/test/java/org/pytorch/serve/util/ConfigManagerTest.java index 4616b5ae03..4b0b70acc8 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/util/ConfigManagerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/util/ConfigManagerTest.java @@ -118,7 +118,8 @@ public void testNumGpuM1() throws ReflectiveOperationException, IOException { String mac_arm64_cpu_only = System.getenv().getOrDefault("TS_MAC_ARM64_CPU_ONLY", "False"); if (arch.equals("aarch64")) { if (mac_arm64_cpu_only.equals("True")) { - Assert.assertEquals(configManager.getNumberOfGpu(), 0); + // Mac M1 returns 1 accelerator device + Assert.assertEquals(configManager.getNumberOfGpu(), 1); } else { Assert.assertTrue(configManager.getNumberOfGpu() > 0); } diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index 5e69377f5a..6c2becfcda 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -88,6 +88,20 @@ def collect_gpu_metrics(num_of_gpus): amdsmi.amdsmi_shut_down() except amdsmi.AmdSmiException as e: logging.error("Could not shut down AMD-SMI library.") + elif torch.backends.mps.is_available(): + try: + total_memory = torch.mps.driver_allocated_memory() + mem_used = torch.mps.current_allocated_memory() + gpu_mem_utilization = ( + (mem_used / total_memory * 100) if total_memory > 0 else 0 + ) + # Currently there is no way to calculate GPU utilization with MPS. + gpu_utilization = None + except Exception as e: + logging.error(f"Could not capture MPS memory metrics") + mem_used = 0 + gpu_mem_utilization = 0 + gpu_utilization = None dimension_gpu = [ Dimension("Level", "Host"),