diff --git a/CHANGELOG.md b/CHANGELOG.md index 649491c..6125867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 1.1.0 (unreleased) +## Unreleased -Memory blowing up on formatting? Try `sequential_output/2`! ### Features -* Introduce `sequential_output/2` which you can can call as a function in formatters. Instead of formatting _everything_ first and then writing it out it will format one HTML file and immediately write it out freeing it up for Garabage Collection. This can lead to huge max memory used savings (12 GB --> 7 GB in a bigger benchmark I ran). * Graphing library (plotly.js) updated, seems to work graphs may look slightly different. ## 1.0.0 (2019-03-28) diff --git a/README.md b/README.md index 6d9ec6a..33382b1 100644 --- a/README.md +++ b/README.md @@ -70,28 +70,6 @@ When you hover the graphs in the HTML report, quite some plotly.js controls and Be aware, that currently when too many samples are recorded (> 100_000 usually) rendering might break as plotly can't handle all that data. See [this issue](https://github.com/PragTob/benchee_html/issues/3) on how to quick fix it and what could be done in the future. -### Too much memory consumption? - -Due to the way that formatters are designed to first `format/2` everything, which can be done in parallel across formatters, and then `output/2` it the formatter can be quite memory hungry. This is due to the fact, that it means all files need to be held in memory before writing them out. Most times, this should not be an issue - however if you run a benchmark with a lot of scenarios and samples it _can_ be. Hence, there is `sequential_output/2` which produces the same output but formats a file and immediately writes it out. - -You can use it as a function: - -```elixir -list = Enum.to_list(1..10_000) -map_fun = fn i -> [i, i * i] end - -Benchee.run( - %{ - "flat_map" => fn -> Enum.flat_map(list, map_fun) end, - "map.flatten" => fn -> list |> Enum.map(map_fun) |> List.flatten() end - }, - formatters: [ - # this is the important bit - fn suite -> Benchee.Formatters.HTML.sequential_output(suite, auto_open: false) end - ] -) -``` - ## PNG image export/download When you hover the graph the controls appear and the left most of those is a camera and says "Download plot as png" - and it does what you'd expect. Refer to the image below if you need more guidance :) diff --git a/lib/benchee/formatters/html.ex b/lib/benchee/formatters/html.ex index 971fc5f..97fef8a 100644 --- a/lib/benchee/formatters/html.ex +++ b/lib/benchee/formatters/html.ex @@ -97,56 +97,6 @@ defmodule Benchee.Formatters.HTML do :ok end - @doc """ - Formats and prints out files sequentially fo consume less memory. - - Benchee loves to do things in parallel, which is usually great, less so if you have a gigantic benchmark though. - By default, benchee's formatters first format everything in parallel (see `format/2`) - which means all that data - needs to be kept in memory. Only a second step benchee writes the results out (see `write/2`). - - This (optional) function is supposed to format something and write it out immediately. For a formatter like HTML - that might write out 30 files or so, this should signficantly reduce memory consumption. - """ - @spec sequential_output(Suite.t(), map() | keyword()) :: :ok - def sequential_output( - %Suite{ - scenarios: scenarios, - system: system, - configuration: %Configuration{unit_scaling: unit_scaling} - }, - opts - ) do - ensure_applications_loaded() - opts = Benchee.Utility.DeepConvert.to_map(opts) - - %{file: filename, auto_open: auto_open?, inline_assets: inline_assets?} = - merge_default_configuration(opts) - - prepare_folder_structure(filename, inline_assets?) - - # descriptors is a variant for the modifiers applied to the file on creation - input_to_descriptors = - scenarios - |> Enum.group_by(fn scenario -> scenario.input_name end) - |> Enum.map(fn input_to_scenarios = {input_name, _scenarios} -> - file_descriptors = - write_reports_for_input( - input_to_scenarios, - system, - filename, - unit_scaling, - inline_assets? - ) - - {input_name, file_descriptors} - end) - - write_index(input_to_descriptors, filename, system, inline_assets?) - - if auto_open?, do: open_report(filename) - :ok - end - defp ensure_applications_loaded do _ = Application.load(:benchee) _ = Application.load(:benchee_html) @@ -221,39 +171,6 @@ defmodule Benchee.Formatters.HTML do Render.comparison(input_name, input_suite, units, scenarios_json, inline_assets)} end - defp write_reports_for_input( - {input_name, scenarios}, - system, - filename, - unit_scaling, - inline_assets - ) do - units = Conversion.units(scenarios, unit_scaling) - - scenario_descriptors = - Enum.map(scenarios, fn scenario -> - report = {descriptors, _content} = scenario_report(scenario, system, units, inline_assets) - create_single_file(report, filename) - - descriptors - end) - - comparison_report = - comparison_report(input_name, scenarios, system, filename, units, inline_assets) - - create_single_file(comparison_report, filename) - - {comparison_descriptors, _content} = comparison_report - - [comparison_descriptors | scenario_descriptors] - end - - defp create_single_file(report, filename) do - # yes wrapping this may feel overdone but provides an easy switch - # if we introduce a nicer utility to Benchee - FileCreation.each([report], filename) - end - defp build_index(input_to_descriptors, filename, system, inline_assets?) do full_index_data = build_index_data(input_to_descriptors, filename) @@ -269,13 +186,6 @@ defmodule Benchee.Formatters.HTML do end) end - defp write_index(input_to_descriptors, filename, system, inline_assets?) do - index_entry = build_index(input_to_descriptors, filename, system, inline_assets?) - create_single_file(index_entry, filename) - - :ok - end - defp open_report(filename) do browser = get_browser() {_, exit_code} = System.cmd(browser, [filename]) diff --git a/samples/fast_sequential.exs b/samples/fast_sequential.exs deleted file mode 100644 index 26c4e99..0000000 --- a/samples/fast_sequential.exs +++ /dev/null @@ -1,15 +0,0 @@ -list = Enum.to_list(1..10_000) -map_fun = fn i -> [i, i * i] end - -Benchee.run( - %{ - "flat_map" => fn -> Enum.flat_map(list, map_fun) end, - "map.flatten" => fn -> list |> Enum.map(map_fun) |> List.flatten() end - }, - formatters: [ - fn suite -> Benchee.Formatters.HTML.sequential_output(suite, auto_open: false) end - ], - time: 0.05, - memory_time: 0.05, - warmup: 0.01 -) diff --git a/test/benchee/formatters/html_integration_test.exs b/test/benchee/formatters/html_integration_test.exs index 5b7bf78..82f5e8c 100644 --- a/test/benchee/formatters/html_integration_test.exs +++ b/test/benchee/formatters/html_integration_test.exs @@ -84,28 +84,6 @@ defmodule Benchee.Formatters.HTMLIntegrationTest do basic_test(benchee_options, assertion_data, run_time: false) end - test "works just fine using sequential_output" do - benchee_options = [ - time: 0.01, - memory_time: 0.01, - warmup: 0.02, - formatters: [ - fn suite -> - Benchee.Formatters.HTML.sequential_output(suite, file: @index_path, auto_open: false) - end - ] - ] - - assertion_data = %{ - comparison_path: @comparison_path, - test_directory: @test_directory, - index_path: @index_path, - base_name: @base_name - } - - basic_test(benchee_options, assertion_data, run_time: true) - end - test "doesn't crash if we're essentially measuring nothing" do capture_io(fn -> assert %Benchee.Suite{} =