From 1cb8465b2fd47db462d0daffd0919d200b5d99cb Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 26 Jan 2025 15:59:00 -0700 Subject: [PATCH] nv2a: Add swizzle test and benchmark --- tests/xbox/swizzle/Makefile | 24 ++++ tests/xbox/swizzle/swizzle-test.c | 217 ++++++++++++++++++++++++++++++ 2 files changed, 241 insertions(+) create mode 100644 tests/xbox/swizzle/Makefile create mode 100644 tests/xbox/swizzle/swizzle-test.c diff --git a/tests/xbox/swizzle/Makefile b/tests/xbox/swizzle/Makefile new file mode 100644 index 00000000000..77bae892946 --- /dev/null +++ b/tests/xbox/swizzle/Makefile @@ -0,0 +1,24 @@ +CC=clang +CC=gcc +CFLAGS=-O2 -Wall -g + +swizzle-test: swizzle-test.o swizzle-a.o + $(CC) -o $@ $^ + +swizzle-test.o: swizzle-test.c + +swizzle-a.o: swizzle.o + objcopy \ + --redefine-sym swizzle_box=swizzle_box_A \ + --redefine-sym unswizzle_box=unswizzle_box_A \ + $< $@ + +swizzle.o: ../../../hw/xbox/nv2a/pgraph/swizzle.c + $(CC) -o $@ $(CFLAGS) -c $< + +%.o: %.c + $(CC) -o $@ $(CFLAGS) -c $< + +.PHONY: clean +clean: + rm -f swizzle-test swizzle.o swizzle-a.o diff --git a/tests/xbox/swizzle/swizzle-test.c b/tests/xbox/swizzle/swizzle-test.c new file mode 100644 index 00000000000..547aecdf918 --- /dev/null +++ b/tests/xbox/swizzle/swizzle-test.c @@ -0,0 +1,217 @@ +/* + * Crosscheck and benchmark swizzle. + * + * Copyright (c) 2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include +#include +#include +#include +#include +#include +#include + +#define X_METHODS \ + X(A) + // X(B) + +typedef void (*swizzle_box_handler)( + const uint8_t *src_buf, + unsigned int width, + unsigned int height, + unsigned int depth, + uint8_t *dst_buf, + unsigned int row_pitch, + unsigned int slice_pitch, + unsigned int bytes_per_pixel); + +typedef struct Method { + const char *name; + swizzle_box_handler swizzle, unswizzle; +} Method; + +#define PROTO(m) \ + void m( \ + const uint8_t *src_buf, \ + unsigned int width, \ + unsigned int height, \ + unsigned int depth, \ + uint8_t *dst_buf, \ + unsigned int row_pitch, \ + unsigned int slice_pitch, \ + unsigned int bytes_per_pixel); + +#define X(m) \ + PROTO(swizzle_box_ ## m) \ + PROTO(unswizzle_box_ ## m) +X_METHODS +#undef X + +const Method methods[] = { + #define X(m) { #m, swizzle_box_ ## m, unswizzle_box_ ## m}, + X_METHODS + #undef X +}; + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +int widths[] = { 1, 2, 4, 8, 16, 32 }; +int heights[] = { 1, 2, 4, 8, 16, 32 }; +int depths[] = { 1, 2, 4, 8, 16, 32 }; +int bpps[] = { 1, 2, 3, 4 }; + +static void crosscheck(void) +{ + assert(ARRAY_SIZE(methods) > 0); + fprintf(stderr, "%s...", __func__); + for (int row_pitch_adjust = 0; row_pitch_adjust < 4; row_pitch_adjust++) + for (int slice_pitch_adjust = 0; slice_pitch_adjust < 4; slice_pitch_adjust++) + for (int depth_idx = 0; depth_idx < ARRAY_SIZE(depths); depth_idx++) + for (int width_idx = 0; width_idx < ARRAY_SIZE(widths); width_idx++) + for (int height_idx = 0; height_idx < ARRAY_SIZE(heights); height_idx++) + for (int bpp_idx = 0; bpp_idx < ARRAY_SIZE(bpps); bpp_idx++) { + + int width = widths[width_idx]; + int height = heights[height_idx]; + int depth = depths[depth_idx]; + int bpp = bpps[bpp_idx]; + + size_t row_pitch = width * bpp + row_pitch_adjust; + size_t slice_pitch = row_pitch * height; + size_t size_bytes = slice_pitch * depth + slice_pitch_adjust; + + uint8_t *original_data = malloc(size_bytes); + for (int i = 0; i < size_bytes; i++) { + original_data[i] = rand(); + } + + void *swizzled_data_A = malloc(size_bytes); + memcpy(swizzled_data_A, original_data, size_bytes); + methods[0].swizzle(original_data, width, height, depth, swizzled_data_A, + row_pitch, slice_pitch, bpp); + + void *unswizzled_data_A = malloc(size_bytes); + memcpy(unswizzled_data_A, original_data, size_bytes); + methods[0].unswizzle(swizzled_data_A, width, height, depth, + unswizzled_data_A, row_pitch, slice_pitch, bpp); + assert(!memcmp(original_data, unswizzled_data_A, size_bytes)); + + for (int method_idx = 1; + method_idx < ARRAY_SIZE(methods); + method_idx++) { + void *swizzled_data_B = malloc(size_bytes); + memcpy(swizzled_data_B, original_data, size_bytes); + methods[method_idx].swizzle(original_data, width, height, depth, + swizzled_data_B, row_pitch, slice_pitch, + bpp); + assert(!memcmp(swizzled_data_B, swizzled_data_A, size_bytes)); + + void *unswizzled_data_B = malloc(size_bytes); + memcpy(unswizzled_data_B, original_data, size_bytes); + methods[method_idx].unswizzle(swizzled_data_B, width, height, depth, + unswizzled_data_B, row_pitch, + slice_pitch, bpp); + assert(!memcmp(original_data, unswizzled_data_B, size_bytes)); + + free(unswizzled_data_B); + free(swizzled_data_B); + } + + free(unswizzled_data_A); + free(swizzled_data_A); + free(original_data); + + // fprintf(stderr, "w:%d, h:%d, d:%d, bpp:%d pitch:%d,%d\n", width, height, depth, bpp, row_pitch_adjust, slice_pitch_adjust); + } + + fprintf(stderr, "ok!\n"); +} + +#define NUM_ITERATIONS 10 + +static int compare_ints(const void *a, const void *b) +{ + return *(int*)a - *(int*)b; +} + +static void bench(void) +{ + fprintf(stderr, "%s...", __func__); + + int width = 256; + int height = 256; + int depth = 256; + int bpp = 4; + + size_t row_pitch = width * bpp; + size_t slice_pitch = row_pitch * height; + size_t size_bytes = slice_pitch * depth; + size_t size_mib = size_bytes / (1024*1024); + fprintf(stderr, "with w: %d, h: %d, d: %d, bpp: %d, " + "size: %zu MiB, iterations: %d\n", + width, height, depth, bpp, size_mib, NUM_ITERATIONS); + + void *original_data = malloc(size_bytes); + memset(original_data, 0, size_bytes); + + void *swizzled_data = malloc(size_bytes); + memset(swizzled_data, 0, size_bytes); + + + for (int method_idx = 0; method_idx < ARRAY_SIZE(methods); method_idx++) { + const Method * const method = &methods[method_idx]; + fprintf(stderr, "[%6s] ", method->name); + + int samples[NUM_ITERATIONS]; + int sum = 0; + + for (int iter = 0; iter < NUM_ITERATIONS; iter++ ) { + struct timespec start, end; + + clock_gettime(CLOCK_MONOTONIC, &start); + method->swizzle(original_data, width, height, depth, swizzled_data, row_pitch, slice_pitch, bpp); + clock_gettime(CLOCK_MONOTONIC, &end); + + uint64_t start_ns = (uint64_t)start.tv_sec * (uint64_t)1000000000 + start.tv_nsec; + uint64_t end_ns = (uint64_t)end.tv_sec * (uint64_t)1000000000 + end.tv_nsec; + + samples[iter] = (end_ns - start_ns) / 1000; + sum += samples[iter]; + } + + qsort(samples, ARRAY_SIZE(samples), sizeof(samples[0]), compare_ints); + + int min = samples[0], + max = samples[ARRAY_SIZE(samples) - 1], + avg = sum / ARRAY_SIZE(samples), + med = samples[ARRAY_SIZE(samples) / 2]; + fprintf(stderr, "min: %6d us, max: %6d us, avg: %6d us, med: %6d us -- %.2g GiB/s\n", + min, max, avg, med, (size_mib / 1024.0) / (med / 1000000.0)); + } + + free(swizzled_data); + free(original_data); +} + +int main(int argc, char const *argv[]) +{ + srand(1337); + + crosscheck(); + bench(); + + return 0; +}