diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp b/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp index 7120ea652..80a16aee6 100644 --- a/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp +++ b/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp @@ -22,7 +22,8 @@ #include #include #include - +#include +#include #include @@ -765,16 +766,18 @@ struct queries { SqlInsert( "CovSegment", { - "StartLine", // 1 - "StartCol", // 2 - "EndLine", // 3 - "EndCol", // 4 - "StartCount", // 5 - "EndCount", // 6 - "HasCount", // 7 - "File", // 8 - "Context", // 9 - "SegmentIndex", // 10 + "Id", // 1 + "StartLine", // 2 + "StartCol", // 3 + "EndLine", // 4 + "EndCol", // 5 + "StartCount", // 6 + "EndCount", // 7 + "HasCount", // 8 + "File", // 9 + "Context", // 10 + "SegmentIndex", // 11 + "NestedIn", // 12 })) , // --- @@ -1031,6 +1034,51 @@ int get_region_id( return ctx.function_region_ids.at(r); } + +template +auto fmt_ctx_field( + std::string const& field_name, + T const& field_value, + FormatContext& ctx) { + fmt_ctx(" ", ctx); + fmt_ctx(field_name, ctx); + fmt_ctx(" = ", ctx); + return fmt_ctx(field_value, ctx); +} + +template <> +struct std::formatter : std::formatter { + template + auto format(const CoverageSegment& p, FormatContext& ctx) const { + fmt_ctx("{", ctx); + fmt_ctx_field("Line", p.Line, ctx); + fmt_ctx_field("Col", p.Col, ctx); + fmt_ctx_field("Count", p.Count, ctx); + fmt_ctx_field("HasCount", p.HasCount, ctx); + fmt_ctx_field("IsRegionEntry", p.IsRegionEntry, ctx); + fmt_ctx_field("IsGapRegion", p.IsGapRegion, ctx); + return fmt_ctx(" }", ctx); + } +}; + +template +std::string format_range(T begin, T end) { + bool isFirst = true; + std::string result = "["; + while (begin != end) { + if (isFirst) { + isFirst = false; + } else { + result += ", "; + } + result += std::format("{}", *begin); + ++begin; + } + + result += "]"; + return result; +} + void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) { TRACE_EVENT("sql", "File coverage data"); int file_id = ctx.get_file_id(file.getFilename().str(), q); @@ -1047,10 +1095,12 @@ void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) { for (auto it : enumerate(file)) { CoverageSegment const& s = it.value(); + std::string prefix = std::string(segment_stack.size() * 2, ' '); if (s.IsRegionEntry) { + ++ctx.segment_counter; segment_stack.push({ .segment = s, - .self_id = ++ctx.segment_counter, + .self_id = ctx.segment_counter, .parent = std::nullopt, }); } else { @@ -1077,20 +1127,34 @@ void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) { q.segment_flat.reset(); } + std::sort( + segment_pairs.begin(), + segment_pairs.end(), + [](std::pair const& lhs, + std::pair const& rhs) -> bool { + return lhs.first.self_id < rhs.first.self_id; + }); + for (auto it : enumerate(segment_pairs)) { auto const& [nesting, end] = it.value(); auto const& start = nesting.segment; - q.segment.bind(1, start.Line); - q.segment.bind(2, start.Col); - q.segment.bind(3, end.Line); - q.segment.bind(4, end.Col); - q.segment.bind(5, (int64_t)start.Count); - q.segment.bind(6, (int64_t)end.Count); - q.segment.bind(7, start.HasCount || end.HasCount); - q.segment.bind(8, file_id); - q.segment.bind(9, ctx.context_id); - q.segment.bind(10, (int)it.index()); + q.segment.bind(1, nesting.self_id); + q.segment.bind(2, start.Line); + q.segment.bind(3, start.Col); + q.segment.bind(4, end.Line); + q.segment.bind(5, end.Col); + q.segment.bind(6, (int64_t)start.Count); + q.segment.bind(7, (int64_t)end.Count); + q.segment.bind(8, start.HasCount || end.HasCount); + q.segment.bind(9, file_id); + q.segment.bind(10, ctx.context_id); + q.segment.bind(11, (int)it.index()); + if (nesting.parent) { + q.segment.bind(12, *nesting.parent); + } else { + q.segment.bind(12, nullptr); + } q.segment.exec(); q.segment.reset(); } diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.sql b/scripts/cxx_codegen/profdata_merger/profdata_merger.sql index eef204a2f..5347c3be0 100644 --- a/scripts/cxx_codegen/profdata_merger/profdata_merger.sql +++ b/scripts/cxx_codegen/profdata_merger/profdata_merger.sql @@ -102,9 +102,11 @@ CREATE TABLE "CovSegment" ( "File" INTEGER NOT NULL, "Context" INTEGER NOT NULL, "SegmentIndex" INTEGER NOT NULL, + "NestedIn" INTEGER, PRIMARY KEY ("Id"), FOREIGN KEY("File") REFERENCES "CovFile" ("Id"), - FOREIGN KEY("Context") REFERENCES "CovContext" ("Id") + FOREIGN KEY("Context") REFERENCES "CovContext" ("Id"), + FOREIGN KEY("NestedIn") REFERENCES "CovSegment" ("Id") ) ; diff --git a/scripts/py_repository/py_repository/gen_coverage_cxx.py b/scripts/py_repository/py_repository/gen_coverage_cxx.py index c4eeec0cd..89f89adb3 100755 --- a/scripts/py_repository/py_repository/gen_coverage_cxx.py +++ b/scripts/py_repository/py_repository/gen_coverage_cxx.py @@ -96,6 +96,7 @@ class CovSegmentFlat(CoverageSchema): Context = ForeignId(CovContext.Id) SegmentIndex = IntColumn() + class CovSegment(CoverageSchema): __tablename__ = "CovSegment" Id = IdColumn() @@ -109,6 +110,7 @@ class CovSegment(CoverageSchema): File = ForeignId(CovFile.Id) Context = ForeignId(CovContext.Id) SegmentIndex = IntColumn() + NestedIn = ForeignId("CovSegment.Id", nullable=True) class CovInstantiationGroup(CoverageSchema): @@ -160,7 +162,7 @@ def extract_text(lines: List[str], start: Tuple[int, int], end: Tuple[int, int]) if start_line == end_line: return lines[start_line - 1][start_column - 1:end_column - 1] - + else: extracted_lines = [ lines[start_line - 1][start_column - 1:] diff --git a/scripts/py_scriptutils/py_scriptutils/pandas_utils.py b/scripts/py_scriptutils/py_scriptutils/pandas_utils.py index 1a86bb4a4..58fdcccb6 100644 --- a/scripts/py_scriptutils/py_scriptutils/pandas_utils.py +++ b/scripts/py_scriptutils/py_scriptutils/pandas_utils.py @@ -1,6 +1,6 @@ import pandas as pd from rich.table import Table -from beartype.typing import List +from beartype.typing import List, Dict, Union import rich.box from beartype import beartype import py_scriptutils.json_utils as ju @@ -32,12 +32,30 @@ def dataframe_to_rich_table(df: pd.DataFrame, exclude_columns: List[str] = []) - return table +def dataframe_from_dict_list( + column_names: Union[List[str], pd.DataFrame], + data_dicts: List[Dict[str, any]], +) -> pd.DataFrame: + if isinstance(column_names, pd.DataFrame): + column_names = column_names.columns.tolist() + + data_prepared = [{col: d.get(col, None) for col in column_names} for d in data_dicts] + df = pd.DataFrame(data_prepared, columns=column_names) + return df + + @beartype def assert_frame(df: pd.DataFrame, subset: ju.Json): - render = dataframe_to_rich_table(df) - render.box = rich.box.ASCII + given_dataframe = dataframe_to_rich_table(df) + given_dataframe.box = rich.box.ASCII + df2 = dataframe_from_dict_list(df, subset) + expected_dataframe = dataframe_to_rich_table(df2) + expected_dataframe.box = rich.box.ASCII ju.assert_subset( df.to_dict("records"), subset, - message=render_rich(render, color=False), + message="\nGiven dataframe:\n{}\nExpected dataframe:\n{}".format( + render_rich(given_dataframe, color=False), + render_rich(expected_dataframe, color=False), + ), ) diff --git a/tests/python/repo/coverage_corpus/test_file_segmentation_2.cpp b/tests/python/repo/coverage_corpus/test_file_segmentation_2.cpp new file mode 100644 index 000000000..525b0a42e --- /dev/null +++ b/tests/python/repo/coverage_corpus/test_file_segmentation_2.cpp @@ -0,0 +1,5 @@ +void action() {} + +int main() { + if (true || false) { action(); } +} diff --git a/tests/python/repo/test_code_coverage.py b/tests/python/repo/test_code_coverage.py index 5b9857c61..e588039b8 100644 --- a/tests/python/repo/test_code_coverage.py +++ b/tests/python/repo/test_code_coverage.py @@ -2,6 +2,7 @@ from pathlib import Path from tempfile import TemporaryDirectory +import re import pandas as pd import py_repository.gen_coverage_cxx as cov from beartype import beartype @@ -296,11 +297,16 @@ def test_file_coverage_filter(): ]) +@beartype +def cleanup_test_code(code: str) -> str: + return re.sub(r"\s+", " ", code.replace("\n", " ")) + + def test_file_segmentation(): with TemporaryDirectory() as tmp: dir = Path(tmp) dir = Path("/tmp/test_base_run_coverage") - code = corpus_base.joinpath("test_file_segmentation.cpp").read_text() + code = corpus_base.joinpath("test_file_segmentation_2.cpp").read_text() cmd = ProfileRunParams(dir=dir, main="main.cpp", files={"main.cpp": code}) cmd.run() @@ -312,22 +318,61 @@ def test_file_segmentation(): df = pd.read_sql(select(cov.CovSegment), session.get_bind()) df["Text"] = df.apply( - lambda row: cov.extract_text( - lines, - start=(row["StartLine"], row["StartCol"]), - end=(row["EndLine"], row["EndCol"]), - ), + lambda row: cleanup_test_code( + cov.extract_text( + lines, + start=(row["StartLine"], row["StartCol"]), + end=(row["EndLine"], row["EndCol"]), + )), axis=1, ) table = dataframe_to_rich_table(df) - table.box = rich.box.ASCII2 table.show_lines = True Path("/tmp/regions.txt").write_text(render_rich(table, color=False)) segment_df = pd.read_sql(select(cov.CovSegmentFlat), session.get_bind()) segment_df["Text"] = segment_df["Line"].map(lambda it: lines[it - 1]) table = dataframe_to_rich_table(segment_df) - table.box = rich.box.ASCII2 table.show_lines = True Path("/tmp/segments.txt").write_text(render_rich(table, color=False)) + + assert_frame(df, [ + dict( + StartLine=1, + EndLine=1, + SegmentIndex=0, + Text="{}", + ), + dict( + StartLine=3, + EndLine=5, + SegmentIndex=1, + Id=2, + Text="{ if (true || false) { action(); } }", + ), + dict( + StartLine=4, + EndLine=4, + SegmentIndex=2, + Text="true", + StartCol=9, + EndCol=13, + NestedIn=2, + ), + dict( + StartLine=4, + EndLine=4, + SegmentIndex=3, + Text="false", + StartCol=17, + EndCol=22, + NestedIn=2, + ), + dict( + StartLine=4, + SegmentIndex=4, + Text="{ action(); }", + NestedIn=2, + ), + ])