Skip to content

Commit

Permalink
fix bugs in notebook, speed testing
Browse files Browse the repository at this point in the history
  • Loading branch information
nvladimus committed Jul 24, 2024
1 parent f1f4f7b commit 18c4ac8
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 49 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## July 2024
:bug: Speeds reported earlier in [`examples_h5writing.ipynb`](/docs/examples/examples_h5writing.ipynb) up to 2300 MB/s are likely due to bugs in testing. Currently speeds of 200 MB/s are more realistic.

## v.1.0.9
:bug: *Bugfix*: `BdvWriter.append_view()` now works correctly when downsampling odd-sized datasets, e.g. (31, 299, 301) (PR #15 by @pr4deepr)

Expand Down
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ API [documentation](https://nvladimus.github.io/npy2bdv/).
See [Recent changes](CHANGELOG.md)

## Writing speed
Writing speeds up to 2300 MB/s can be achieved on a PC with SSD drive.
The speed of writing for long time series (>100 stacks) is typically about 700-900 MB/s.
This is in the range of full-speed camera acquisition
of Hamamatsu Orca Flash4, e.g. 840 MB/s (2048x2048 px at 100 Hz).
Currently speeds betewen 150 - 320 MB/s were achieved with confidence. Speeds of up to 2300 MB/s reported in earlier versions are likely due to bugs in testing methods in notebook [`examples_h5writing.ipynb`](/docs/examples/examples_h5writing.ipynb). More tesing is underway.

## Acknowledgements
This code was inspired by [Talley Lambert's](https://github.com/tlambert03/imarispy) code
Expand Down
173 changes: 128 additions & 45 deletions docs/examples/examples_h5writing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -38,6 +38,16 @@
"import npy2bdv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# optional: upgrade to latest version\n",
"! pip install -U npy2bdv "
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -50,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -60,20 +70,17 @@
" y = np.linspace(-3, 3, dim_yx[0])\n",
" sigma = 1.0 - abs(iz - nz/2) / nz\n",
" x, y = np.meshgrid(x, y)\n",
" return 65535 * np.exp(- ((x ** 2) + (y ** 2)) / (2 * sigma**2) )\n",
" return (65535 * np.exp(- ((x ** 2) + (y ** 2)) / (2 * sigma**2) )).astype('uint16')\n",
"\n",
"def generate_test_stack(nz, ny, nx):\n",
" stack = np.empty((nz, ny, nx))\n",
" for z in range(nz):\n",
" stack[z, :, :] = generate_test_image((ny, nx), z, nz)\n",
" return stack\n",
" return stack.astype('uint16')\n",
" \n",
"examples_dir = \"./example_files/\"\n",
"if not os.path.exists(examples_dir):\n",
" os.mkdir(examples_dir)\n",
"\n",
"nz, ny, nx = 50, 1024, 2048\n",
"stack = generate_test_stack(nz, ny, nx)"
" os.mkdir(examples_dir)"
]
},
{
Expand All @@ -88,20 +95,22 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dataset in ./example_files/ex1_t2_ch2_illum2_angle2_raw.h5\n",
"Wall time: 6.19 s\n"
"Wall time: 9.03 s\n"
]
}
],
"source": [
"%%time\n",
"nz, ny, nx = 50, 1024, 2048\n",
"stack = generate_test_stack(nz, ny, nx)\n",
"fname = examples_dir + \"ex1_t2_ch2_illum2_angle2_raw.h5\"\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=2, nilluminations=2, nangles=2)\n",
"\n",
Expand All @@ -126,50 +135,90 @@
"source": [
"## 2. Writing speed test\n",
"\n",
"Speed test for raw data writing, 20 time points and 2 channels. File size is 8 GB.\n",
"\n",
"Note: For some reason, stacks created in `float64` format are written **5x faster** than stacks in `uint16`. Storage in H5 is `uint16` in both cases, this is unexpected speed difference. Todo: look into h5py for details."
"Speed test for raw data writing, 20 time points and 2 channels. File size is 17 GB."
]
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"H5 mean writing time per stack: 0.397 sec.\n",
"H5 mean writing speed: 2111 MB/s\n",
"clock on: 1721819333\n",
"clock off: 1721819398\n",
"H5 mean writing time per stack: 0.046 sec.\n",
"H5 mean writing speed: 9042 MB/s\n",
"dataset in ./example_files/ex2_t20_chan2.h5\n"
]
}
],
"source": [
"ntimes = 20\n",
"nchannels = 2\n",
"start_time_total = time.time()\n",
"i_stacks = 0\n",
"time_list = []\n",
"nt, nc, nz, ny, nx = 20, 2, 50, 2048, 2048\n",
"stack = generate_test_stack(nz, ny, nx)\n",
"\n",
"fname = examples_dir + \"ex2_t20_chan2.h5\"\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=2)\n",
"for ichannel in range(nchannels):\n",
" for itime in range(ntimes):\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=nc)\n",
"\n",
"start_time = time.time()\n",
"print('clock on:',int(start_time))\n",
"\n",
"for ichannel in range(nc):\n",
" for itime in range(nt):\n",
" start_time = time.time()\n",
" bdv_writer.append_view(stack, time=itime, channel=ichannel)\n",
" time_interval = time.time() - start_time\n",
" time_list.append(time_interval)\n",
" i_stacks += 1.0\n",
"\n",
"bdv_writer.write_xml()\n",
"bdv_writer.close()\n",
"time_per_stack = (time.time() - start_time_total) / i_stacks\n",
"\n",
"stop_time = time.time()\n",
"print('clock off:', int(stop_time))\n",
"time_per_stack = (stop_time - start_time) / (nt*nc)\n",
"\n",
"print(f\"H5 mean writing time per stack: {time_per_stack:1.3f} sec.\")\n",
"print(f\"H5 mean writing speed: {int(sys.getsizeof(stack) / time_per_stack / 1e6)} MB/s\")\n",
"print(f\"dataset in {fname}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning:** This speed should not be trusted: time variables `start_time` and `stop_time` change during cell execution, see below:"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"clock on: 1721819396\n",
"clock off: 1721819398\n"
]
},
{
"data": {
"text/plain": [
"1.855459213256836"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('clock on:',int(start_time))\n",
"print('clock off:', int(stop_time))\n",
"stop_time - start_time"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -467,30 +516,40 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"virtual stack in ./example_files/ex6_t1_ch2_virtual_by_plane.h5\n"
"H5 mean writing speed: 216 MB/s\n",
"H5 mean writing time per plane: 0.039 sec.\n",
"virtual stack in ./example_files/ex6_t1_ch2_virtual_by_plane.h5\n",
"Wall time: 31.1 s\n"
]
}
],
"source": [
"nz, ny, nx = 250, 1024, 2048\n",
"%%time\n",
"nc, nz, ny, nx = 2, 400, 2048, 2048\n",
"test_image = generate_test_image((ny, nx), nz/2, nz)\n",
"fname = examples_dir + \"ex6_t1_ch2_virtual_by_plane.h5\"\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=2, blockdim=((1, 256, 256),))\n",
"\n",
"for i_ch in range(2):\n",
"start_time = time.time()\n",
"for i_ch in range(nc):\n",
" bdv_writer.append_view(stack=None, virtual_stack_dim=(nz,ny,nx), time=0, channel=i_ch)\n",
" for z in range(nz):\n",
" bdv_writer.append_plane(plane=test_image, z=z, time=0, channel=i_ch)\n",
"\n",
"bdv_writer.write_xml()\n",
"bdv_writer.close()\n",
"\n",
"time_total = time.time() - start_time\n",
"ave_time_per_plane = time_total/(nc*nz)\n",
"print(f\"H5 mean writing speed: {int(sys.getsizeof(test_image)*nc*nz / time_total / 1e6)} MB/s\")\n",
"print(f\"H5 mean writing time per plane: {ave_time_per_plane:1.3f} sec.\")\n",
"print(f\"virtual stack in {fname}\")"
]
},
Expand All @@ -503,30 +562,36 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"virtual stack in ./example_files/ex6_t1_ch2_virtual_by_substack.h5\n"
"clock started\n",
"H5 mean writing speed: 325 MB/s\n",
"H5 mean writing time per plane: 0.026 sec.\n",
"virtual stack in ./example_files/ex6_t1_ch2_virtual_by_substack.h5\n",
"Wall time: 2min 3s\n"
]
}
],
"source": [
"nz, ny, nx, n_substacks = 250, 1024, 2048, 4\n",
"%%time\n",
"nc, nz, ny, nx, n_substacks = 2, 400, 2048, 2048, 4\n",
"stack = generate_test_stack(nz, ny, nx)\n",
"fname = examples_dir + \"ex6_t1_ch2_virtual_by_substack.h5\"\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=2, blockdim=((32, 32, 32),))\n",
"\n",
"print('clock started')\n",
"start_time = time.time()\n",
"#initialize virtual stacks\n",
"for i_ch in range(2):\n",
"for i_ch in range(nc):\n",
" bdv_writer.append_view(stack=None, virtual_stack_dim=(nz,ny,nx), time=0, channel=i_ch)\n",
"\n",
"# populate virtual stacks \n",
"for i_ch in range(2):\n",
" bdv_writer.append_plane(plane=test_image, z=z, time=0, channel=i_ch)\n",
"for i_ch in range(nc):\n",
" for isub in range(n_substacks):\n",
" zslice = slice(isub*(nz//n_substacks), (isub+1)*(nz//n_substacks))\n",
" bdv_writer.append_substack(substack=stack[zslice, :, :],\n",
Expand All @@ -535,6 +600,12 @@
"\n",
"bdv_writer.write_xml()\n",
"bdv_writer.close()\n",
"\n",
"time_total = time.time() - start_time\n",
"ave_time_per_plane = time_total/(nc*nz)\n",
"\n",
"print(f\"H5 mean writing speed: {int(sys.getsizeof(stack) * nc / time_total / 1e6)} MB/s\")\n",
"print(f\"H5 mean writing time per plane: {ave_time_per_plane:1.3f} sec.\")\n",
"print(f\"virtual stack in {fname}\")"
]
},
Expand All @@ -555,34 +626,46 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO: blockdim levels (1) < subsamp levels (3): First-level block size (1, 128, 128) will be used for all levels\n",
"virtual stack with subsampling is in ./example_files/ex6a_t1_ch1_virtual_by_plane_subsamp.h5\n"
"clock started\n",
"H5 mean writing speed: 161 MB/s\n",
"H5 mean writing time per plane: 0.052 sec.\n",
"virtual stack in ./example_files/ex6a_t1_ch1_virtual_by_plane_subsamp.h5\n",
"Wall time: 41.8 s\n"
]
}
],
"source": [
"nz, ny, nx = 50, 1024, 2048\n",
"%%time\n",
"nc, nz, ny, nx = 2, 400, 2048, 2048\n",
"test_image = generate_test_image((ny, nx), nz/2, nz)\n",
"fname = examples_dir + \"ex6a_t1_ch1_virtual_by_plane_subsamp.h5\"\n",
"bdv_writer = npy2bdv.BdvWriter(fname, nchannels=2,\n",
" blockdim=((1, 128, 128),),\n",
" subsamp=((1, 1, 1), (1, 4, 4), (1, 8, 8)))\n",
"\n",
"for i_ch in range(2):\n",
"print('clock started')\n",
"start_time = time.time()\n",
"for i_ch in range(nc):\n",
" bdv_writer.append_view(stack=None, virtual_stack_dim=(nz,ny,nx), time=0, channel=i_ch)\n",
" for z in range(nz):\n",
" bdv_writer.append_plane(plane=test_image, z=z, time=0, channel=i_ch)\n",
"\n",
"bdv_writer.write_xml()\n",
"bdv_writer.close()\n",
"print(f\"virtual stack with subsampling is in {fname}\")"
"\n",
"time_total = time.time() - start_time\n",
"ave_time_per_plane = time_total/(nc*nz)\n",
"\n",
"print(f\"H5 mean writing speed: {int(sys.getsizeof(test_image) * nc * nz / time_total / 1e6)} MB/s\")\n",
"print(f\"H5 mean writing time per plane: {ave_time_per_plane:1.3f} sec.\")\n",
"print(f\"virtual stack in {fname}\")"
]
},
{
Expand Down Expand Up @@ -664,7 +747,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand Down

0 comments on commit 18c4ac8

Please sign in to comment.