test: coverage file segmentation retrieval

haxscramper · Apr 21, 2024 · e49d4c5 · e49d4c5
1 parent f6005cb
commit e49d4c5
Show file tree

Hide file tree

Showing 6 changed files with 172 additions and 36 deletions.
diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp b/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp
@@ -22,7 +22,8 @@
 #include <unordered_map>
 #include <boost/describe.hpp>
 #include <llvm/ADT/Hashing.h>
-
+#include <hstd/system/Formatter.hpp>
+#include <algorithm>
 
 #include <perfetto.h>
 
@@ -765,16 +766,18 @@ struct queries {
             SqlInsert(
                 "CovSegment",
                 {
-                    "StartLine",    // 1
-                    "StartCol",     // 2
-                    "EndLine",      // 3
-                    "EndCol",       // 4
-                    "StartCount",   // 5
-                    "EndCount",     // 6
-                    "HasCount",     // 7
-                    "File",         // 8
-                    "Context",      // 9
-                    "SegmentIndex", // 10
+                    "Id",           // 1
+                    "StartLine",    // 2
+                    "StartCol",     // 3
+                    "EndLine",      // 4
+                    "EndCol",       // 5
+                    "StartCount",   // 6
+                    "EndCount",     // 7
+                    "HasCount",     // 8
+                    "File",         // 9
+                    "Context",      // 10
+                    "SegmentIndex", // 11
+                    "NestedIn",     // 12
                 }))
         ,
         // ---
@@ -1031,6 +1034,51 @@ int get_region_id(
     return ctx.function_region_ids.at(r);
 }
 
+
+template <typename T, typename FormatContext>
+auto fmt_ctx_field(
+    std::string const& field_name,
+    T const&           field_value,
+    FormatContext&     ctx) {
+    fmt_ctx(" ", ctx);
+    fmt_ctx(field_name, ctx);
+    fmt_ctx(" = ", ctx);
+    return fmt_ctx(field_value, ctx);
+}
+
+template <>
+struct std::formatter<CoverageSegment> : std::formatter<std::string> {
+    template <typename FormatContext>
+    auto format(const CoverageSegment& p, FormatContext& ctx) const {
+        fmt_ctx("{", ctx);
+        fmt_ctx_field("Line", p.Line, ctx);
+        fmt_ctx_field("Col", p.Col, ctx);
+        fmt_ctx_field("Count", p.Count, ctx);
+        fmt_ctx_field("HasCount", p.HasCount, ctx);
+        fmt_ctx_field("IsRegionEntry", p.IsRegionEntry, ctx);
+        fmt_ctx_field("IsGapRegion", p.IsGapRegion, ctx);
+        return fmt_ctx(" }", ctx);
+    }
+};
+
+template <typename T>
+std::string format_range(T begin, T end) {
+    bool        isFirst = true;
+    std::string result  = "[";
+    while (begin != end) {
+        if (isFirst) {
+            isFirst = false;
+        } else {
+            result += ", ";
+        }
+        result += std::format("{}", *begin);
+        ++begin;
+    }
+
+    result += "]";
+    return result;
+}
+
 void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) {
     TRACE_EVENT("sql", "File coverage data");
     int file_id = ctx.get_file_id(file.getFilename().str(), q);
@@ -1047,10 +1095,12 @@ void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) {
 
     for (auto it : enumerate(file)) {
         CoverageSegment const& s = it.value();
+        std::string prefix = std::string(segment_stack.size() * 2, ' ');
         if (s.IsRegionEntry) {
+            ++ctx.segment_counter;
             segment_stack.push({
                 .segment = s,
-                .self_id = ++ctx.segment_counter,
+                .self_id = ctx.segment_counter,
                 .parent  = std::nullopt,
             });
         } else {
@@ -1077,20 +1127,34 @@ void add_file(CoverageData const& file, queries& q, db_build_ctx& ctx) {
         q.segment_flat.reset();
     }
 
+    std::sort(
+        segment_pairs.begin(),
+        segment_pairs.end(),
+        [](std::pair<NestingData, CoverageSegment> const& lhs,
+           std::pair<NestingData, CoverageSegment> const& rhs) -> bool {
+            return lhs.first.self_id < rhs.first.self_id;
+        });
+
     for (auto it : enumerate(segment_pairs)) {
         auto const& [nesting, end] = it.value();
         auto const& start          = nesting.segment;
 
-        q.segment.bind(1, start.Line);
-        q.segment.bind(2, start.Col);
-        q.segment.bind(3, end.Line);
-        q.segment.bind(4, end.Col);
-        q.segment.bind(5, (int64_t)start.Count);
-        q.segment.bind(6, (int64_t)end.Count);
-        q.segment.bind(7, start.HasCount || end.HasCount);
-        q.segment.bind(8, file_id);
-        q.segment.bind(9, ctx.context_id);
-        q.segment.bind(10, (int)it.index());
+        q.segment.bind(1, nesting.self_id);
+        q.segment.bind(2, start.Line);
+        q.segment.bind(3, start.Col);
+        q.segment.bind(4, end.Line);
+        q.segment.bind(5, end.Col);
+        q.segment.bind(6, (int64_t)start.Count);
+        q.segment.bind(7, (int64_t)end.Count);
+        q.segment.bind(8, start.HasCount || end.HasCount);
+        q.segment.bind(9, file_id);
+        q.segment.bind(10, ctx.context_id);
+        q.segment.bind(11, (int)it.index());
+        if (nesting.parent) {
+            q.segment.bind(12, *nesting.parent);
+        } else {
+            q.segment.bind(12, nullptr);
+        }
         q.segment.exec();
         q.segment.reset();
     }

diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.sql b/scripts/cxx_codegen/profdata_merger/profdata_merger.sql
@@ -102,9 +102,11 @@ CREATE TABLE "CovSegment" (
 	"File" INTEGER NOT NULL, 
 	"Context" INTEGER NOT NULL, 
 	"SegmentIndex" INTEGER NOT NULL, 
+	"NestedIn" INTEGER, 
 	PRIMARY KEY ("Id"), 
 	FOREIGN KEY("File") REFERENCES "CovFile" ("Id"), 
-	FOREIGN KEY("Context") REFERENCES "CovContext" ("Id")
+	FOREIGN KEY("Context") REFERENCES "CovContext" ("Id"), 
+	FOREIGN KEY("NestedIn") REFERENCES "CovSegment" ("Id")
 )
 
 ;

diff --git a/scripts/py_repository/py_repository/gen_coverage_cxx.py b/scripts/py_repository/py_repository/gen_coverage_cxx.py
@@ -96,6 +96,7 @@ class CovSegmentFlat(CoverageSchema):
     Context = ForeignId(CovContext.Id)
     SegmentIndex = IntColumn()
 
+
 class CovSegment(CoverageSchema):
     __tablename__ = "CovSegment"
     Id = IdColumn()
@@ -109,6 +110,7 @@ class CovSegment(CoverageSchema):
     File = ForeignId(CovFile.Id)
     Context = ForeignId(CovContext.Id)
     SegmentIndex = IntColumn()
+    NestedIn = ForeignId("CovSegment.Id", nullable=True)
 
 
 class CovInstantiationGroup(CoverageSchema):
@@ -160,7 +162,7 @@ def extract_text(lines: List[str], start: Tuple[int, int], end: Tuple[int, int])
 
     if start_line == end_line:
         return lines[start_line - 1][start_column - 1:end_column - 1]
-    
+
     else:
         extracted_lines = [
             lines[start_line - 1][start_column - 1:]

diff --git a/scripts/py_scriptutils/py_scriptutils/pandas_utils.py b/scripts/py_scriptutils/py_scriptutils/pandas_utils.py
@@ -1,6 +1,6 @@
 import pandas as pd
 from rich.table import Table
-from beartype.typing import List
+from beartype.typing import List, Dict, Union
 import rich.box
 from beartype import beartype
 import py_scriptutils.json_utils as ju
@@ -32,12 +32,30 @@ def dataframe_to_rich_table(df: pd.DataFrame, exclude_columns: List[str] = []) -
     return table
 
 
+def dataframe_from_dict_list(
+    column_names: Union[List[str], pd.DataFrame],
+    data_dicts: List[Dict[str, any]],
+) -> pd.DataFrame:
+    if isinstance(column_names, pd.DataFrame):
+        column_names = column_names.columns.tolist()
+
+    data_prepared = [{col: d.get(col, None) for col in column_names} for d in data_dicts]
+    df = pd.DataFrame(data_prepared, columns=column_names)
+    return df
+
+
 @beartype
 def assert_frame(df: pd.DataFrame, subset: ju.Json):
-    render = dataframe_to_rich_table(df)
-    render.box = rich.box.ASCII
+    given_dataframe = dataframe_to_rich_table(df)
+    given_dataframe.box = rich.box.ASCII
+    df2 = dataframe_from_dict_list(df, subset)
+    expected_dataframe = dataframe_to_rich_table(df2)
+    expected_dataframe.box = rich.box.ASCII
     ju.assert_subset(
         df.to_dict("records"),
         subset,
-        message=render_rich(render, color=False),
+        message="\nGiven dataframe:\n{}\nExpected dataframe:\n{}".format(
+            render_rich(given_dataframe, color=False),
+            render_rich(expected_dataframe, color=False),
+        ),
     )
diff --git a/tests/python/repo/coverage_corpus/test_file_segmentation_2.cpp b/tests/python/repo/coverage_corpus/test_file_segmentation_2.cpp
@@ -0,0 +1,5 @@
+void action() {}
+
+int main() {
+    if (true || false) { action(); }
+}
diff --git a/tests/python/repo/test_code_coverage.py b/tests/python/repo/test_code_coverage.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
+import re
 import pandas as pd
 import py_repository.gen_coverage_cxx as cov
 from beartype import beartype
@@ -296,11 +297,16 @@ def test_file_coverage_filter():
         ])
 
 
+@beartype
+def cleanup_test_code(code: str) -> str:
+    return re.sub(r"\s+", " ", code.replace("\n", " "))
+
+
 def test_file_segmentation():
     with TemporaryDirectory() as tmp:
         dir = Path(tmp)
         dir = Path("/tmp/test_base_run_coverage")
-        code = corpus_base.joinpath("test_file_segmentation.cpp").read_text()
+        code = corpus_base.joinpath("test_file_segmentation_2.cpp").read_text()
         cmd = ProfileRunParams(dir=dir, main="main.cpp", files={"main.cpp": code})
 
         cmd.run()
@@ -312,22 +318,61 @@ def test_file_segmentation():
 
         df = pd.read_sql(select(cov.CovSegment), session.get_bind())
         df["Text"] = df.apply(
-            lambda row: cov.extract_text(
-                lines,
-                start=(row["StartLine"], row["StartCol"]),
-                end=(row["EndLine"], row["EndCol"]),
-            ),
+            lambda row: cleanup_test_code(
+                cov.extract_text(
+                    lines,
+                    start=(row["StartLine"], row["StartCol"]),
+                    end=(row["EndLine"], row["EndCol"]),
+                )),
             axis=1,
         )
 
         table = dataframe_to_rich_table(df)
-        table.box = rich.box.ASCII2
         table.show_lines = True
         Path("/tmp/regions.txt").write_text(render_rich(table, color=False))
 
         segment_df = pd.read_sql(select(cov.CovSegmentFlat), session.get_bind())
         segment_df["Text"] = segment_df["Line"].map(lambda it: lines[it - 1])
         table = dataframe_to_rich_table(segment_df)
-        table.box = rich.box.ASCII2
         table.show_lines = True
         Path("/tmp/segments.txt").write_text(render_rich(table, color=False))
+
+        assert_frame(df, [
+            dict(
+                StartLine=1,
+                EndLine=1,
+                SegmentIndex=0,
+                Text="{}",
+            ),
+            dict(
+                StartLine=3,
+                EndLine=5,
+                SegmentIndex=1,
+                Id=2,
+                Text="{ if (true || false) { action(); } }",
+            ),
+            dict(
+                StartLine=4,
+                EndLine=4,
+                SegmentIndex=2,
+                Text="true",
+                StartCol=9,
+                EndCol=13,
+                NestedIn=2,
+            ),
+            dict(
+                StartLine=4,
+                EndLine=4,
+                SegmentIndex=3,
+                Text="false",
+                StartCol=17,
+                EndCol=22,
+                NestedIn=2,
+            ),
+            dict(
+                StartLine=4,
+                SegmentIndex=4,
+                Text="{ action(); }",
+                NestedIn=2,
+            ),
+        ])