diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp b/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp index 154b28ab7..ec816acd1 100644 --- a/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp +++ b/scripts/cxx_codegen/profdata_merger/profdata_merger.cpp @@ -767,10 +767,10 @@ struct queries { "CovSegment", { "Id", // 1 - "StartLine", // 2 - "StartCol", // 3 - "EndLine", // 4 - "EndCol", // 5 + "LineStart", // 2 + "ColStart", // 3 + "LineEnd", // 4 + "ColEnd", // 5 "StartCount", // 6 "EndCount", // 7 "HasCount", // 8 diff --git a/scripts/cxx_codegen/profdata_merger/profdata_merger.sql b/scripts/cxx_codegen/profdata_merger/profdata_merger.sql index 588a9c5db..6dd07fad4 100644 --- a/scripts/cxx_codegen/profdata_merger/profdata_merger.sql +++ b/scripts/cxx_codegen/profdata_merger/profdata_merger.sql @@ -92,10 +92,10 @@ CREATE TABLE "CovFunctionInstantiation" ( CREATE TABLE "CovSegment" ( "Id" INTEGER NOT NULL, - "StartLine" INTEGER NOT NULL, - "StartCol" INTEGER NOT NULL, - "EndLine" INTEGER NOT NULL, - "EndCol" INTEGER NOT NULL, + "LineStart" INTEGER NOT NULL, + "ColStart" INTEGER NOT NULL, + "LineEnd" INTEGER NOT NULL, + "ColEnd" INTEGER NOT NULL, "StartCount" INTEGER NOT NULL, "EndCount" INTEGER NOT NULL, "HasCount" BOOLEAN NOT NULL, diff --git a/scripts/py_repository/py_repository/gen_coverage_cxx.py b/scripts/py_repository/py_repository/gen_coverage_cxx.py index cd83af120..aedc7d6ce 100755 --- a/scripts/py_repository/py_repository/gen_coverage_cxx.py +++ b/scripts/py_repository/py_repository/gen_coverage_cxx.py @@ -1,16 +1,17 @@ #!/usr/env/bin python -from beartype.typing import Optional, Any, List, Tuple +from beartype.typing import Optional, Any, List, Tuple, Iterable from pydantic import Field, BaseModel -from sqlalchemy import create_engine, Column +from sqlalchemy import create_engine, Column, select, Select from sqlalchemy import Enum as SqlEnum from sqlalchemy.schema import CreateTable -from sqlalchemy.orm import declarative_base +from sqlalchemy.orm import declarative_base, Session from py_scriptutils.sqlalchemy_utils import IdColumn, ForeignId, IntColumn, StrColumn, BoolColumn from py_scriptutils.repo_files import get_haxorg_repo_root_path from sqlalchemy.types import JSON import enum from beartype import beartype +from pathlib import Path CoverageSchema = declarative_base() @@ -100,10 +101,10 @@ class CovSegmentFlat(CoverageSchema): class CovSegment(CoverageSchema): __tablename__ = "CovSegment" Id = IdColumn() - StartLine = IntColumn() - StartCol = IntColumn() - EndLine = IntColumn() - EndCol = IntColumn() + LineStart = IntColumn() + ColStart = IntColumn() + LineEnd = IntColumn() + ColEnd = IntColumn() StartCount = IntColumn() EndCount = IntColumn() HasCount = BoolColumn() @@ -113,6 +114,10 @@ class CovSegment(CoverageSchema): NestedIn = ForeignId("CovSegment.Id", nullable=True) IsLeaf = BoolColumn() + def intersects(self, line: int, col: int) -> bool: + return (self.LineStart <= line <= self.LineEnd) and (self.ColStart <= col <= + self.ColEnd) + class CovInstantiationGroup(CoverageSchema): __tablename__ = "CovInstantiationGroup" @@ -171,6 +176,75 @@ def extract_text(lines: List[str], start: Tuple[int, int], end: Tuple[int, int]) return "\n".join(extracted_lines) +@beartype +class CoverageSegmentTree: + + def __init__(self, segments: Iterable[CovSegment]): + self.root = None + self.segments = sorted(segments, key=lambda x: (x.LineStart, x.ColStart)) + if self.segments: + self.root = self.build_tree(0, len(self.segments) - 1) + + @beartype + class Node: + + def __init__(self, start: int, end: int, segments: Iterable[CovSegment]): + self.start = start + self.end = end + self.segments = segments + self.left: Optional['CoverageSegmentTree.Node'] = None + self.right: Optional['CoverageSegmentTree.Node'] = None + + def build_tree(self, start: int, end: int) -> Node: + if start > end: + return None + if start == end: + return self.Node(start, end, [self.segments[start]]) + + mid = (start + end) // 2 + node = self.Node(start, end, self.segments[start:end + 1]) + node.left = self.build_tree(start, mid) + node.right = self.build_tree(mid + 1, end) + return node + + def query(self, + line: int, + col: int, + node: Optional[Node] = None) -> Iterable[CovSegment]: + if node is None: + node = self.root + if node is None: + return [] + + # If the point is outside the bounds of the segments in this node + if node.start > line or node.end < line: + return [] + + # Check for intersection with segments at this node + result = [seg for seg in node.segments if seg.intersects(line, col)] + + # Recurse on child nodes + if node.left and line <= (node.left.start + node.left.end) // 2: + result.extend(self.query(line, col, node.left)) + if node.right and line >= (node.right.start + node.right.end) // 2 + 1: + result.extend(self.query(line, col, node.right)) + + return result + + +@beartype +def get_coverage_of(session: Session, path: Path) -> Select[Tuple[CovSegment]]: + target_id = session.execute( + select(CovFile).where(CovFile.Path == str(path))).fetchall() + + if len(target_id) != 1: + raise ValueError( + f"{len(target_id)} files matched for given path '{path}', expected exactly one match" + ) + + return select(CovSegment).where(CovSegment.File == target_id[0][0].Id) + + if __name__ == "__main__": sql_url = "sqlite:///:memory:" db_engine = create_engine(sql_url) diff --git a/scripts/py_scriptutils/py_scriptutils/json_utils.py b/scripts/py_scriptutils/py_scriptutils/json_utils.py index 22566e717..b5662d780 100644 --- a/scripts/py_scriptutils/py_scriptutils/json_utils.py +++ b/scripts/py_scriptutils/py_scriptutils/json_utils.py @@ -147,7 +147,8 @@ def get_path(value: Json) -> Json: def assert_subset(main: Json, subset: Json, message: Optional[str] = None): diff = get_subset_diff(main_set=main, expected_subset=subset) - compare = "\n".join([ + compare = "Could not find expected subset of values in the main set\n\n" + compare += "\n".join([ "[{}]{}".format( idx, describe_diff( diff --git a/scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py b/scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py index dcf264f25..e4a3c8881 100644 --- a/scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py +++ b/scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py @@ -87,10 +87,13 @@ def format_rich_table(engine: Engine, @beartype def format_rich_query( - engine: Engine, + engine: Union[Engine, Session], query: Executable, column_labels: List[str] = [], ) -> Table: + + if isinstance(engine, Session): + engine = engine.get_bind() rich_table = Table(show_header=True, header_style="bold blue") with engine.connect() as connection: diff --git a/tests/python/repo/coverage_corpus/test_file_segmentation.cpp b/tests/python/repo/coverage_corpus/test_file_segmentation_1.cpp similarity index 100% rename from tests/python/repo/coverage_corpus/test_file_segmentation.cpp rename to tests/python/repo/coverage_corpus/test_file_segmentation_1.cpp diff --git a/tests/python/repo/test_code_coverage.py b/tests/python/repo/test_code_coverage.py index b63a731b2..323412294 100644 --- a/tests/python/repo/test_code_coverage.py +++ b/tests/python/repo/test_code_coverage.py @@ -292,8 +292,8 @@ def test_file_coverage_filter(): assert len(df) == 2 assert_frame(df, [ - dict(StartLine=1, EndLine=1, Path="file1.cpp"), - dict(StartLine=5, EndLine=8, Path="main.cpp"), + dict(LineStart=1, LineEnd=1, Path="file1.cpp"), + dict(LineStart=5, LineEnd=8, Path="main.cpp"), ]) @@ -302,7 +302,47 @@ def cleanup_test_code(code: str) -> str: return re.sub(r"\s+", " ", code.replace("\n", " ")) -def test_file_segmentation(): +@beartype +def add_cov_segment_text(df: pd.DataFrame, lines: List[str]): + df["Text"] = df.apply( + lambda row: cleanup_test_code( + cov.extract_text( + lines, + start=(row["LineStart"], row["ColStart"]), + end=(row["LineEnd"], row["ColEnd"]), + )), + axis=1, + ) + + +def test_file_segmentation_1(): + with TemporaryDirectory() as tmp: + dir = Path(tmp) + dir = Path("/tmp/test_base_run_coverage") + code = corpus_base.joinpath("test_file_segmentation_1.cpp").read_text() + cmd = ProfileRunParams(dir=dir, main="main.cpp", files={"main.cpp": code}) + cmd.run() + + session = open_sqlite_session(cmd.get_sqlite(), cov.CoverageSchema) + main_cov = cov.get_coverage_of(session, cmd.get_code("main.cpp")) + lines = code.split("\n") + + segtree = cov.CoverageSegmentTree(it[0] for it in session.execute(main_cov)) + df = pd.read_sql(main_cov, session.get_bind()) + add_cov_segment_text(df, lines) + + # print(render_rich(dataframe_to_rich_table(df))) + + # Coverage segments only overlay executable blocks and do not + # account for extraneous elements such as function headers etc. + assert segtree.query(line=1, col=15) + assert not segtree.query(line=1, col=14) + assert_frame(df[df["LineStart"] == 1], [ + dict(IsLeaf=True, Text="{}", ColStart=15, ColEnd=17), + ]) + + +def test_file_segmentation_2(): with TemporaryDirectory() as tmp: dir = Path(tmp) dir = Path("/tmp/test_base_run_coverage") @@ -317,15 +357,7 @@ def test_file_segmentation(): lines = code.split("\n") df = pd.read_sql(select(cov.CovSegment), session.get_bind()) - df["Text"] = df.apply( - lambda row: cleanup_test_code( - cov.extract_text( - lines, - start=(row["StartLine"], row["StartCol"]), - end=(row["EndLine"], row["EndCol"]), - )), - axis=1, - ) + add_cov_segment_text(df, lines) table = dataframe_to_rich_table(df) table.show_lines = True @@ -339,42 +371,42 @@ def test_file_segmentation(): assert_frame(df, [ dict( - StartLine=1, - EndLine=1, + LineStart=1, + LineEnd=1, SegmentIndex=0, Text="{}", IsLeaf=True, ), dict( - StartLine=3, - EndLine=5, + LineStart=3, + LineEnd=5, SegmentIndex=1, Id=2, Text="{ if (true || false) { action(); } }", IsLeaf=False, ), dict( - StartLine=4, - EndLine=4, + LineStart=4, + LineEnd=4, SegmentIndex=2, Text="true", - StartCol=9, - EndCol=13, + ColStart=9, + ColEnd=13, NestedIn=2, IsLeaf=True, ), dict( - StartLine=4, - EndLine=4, + LineStart=4, + LineEnd=4, SegmentIndex=3, Text="false", - StartCol=17, - EndCol=22, + ColStart=17, + ColEnd=22, NestedIn=2, IsLeaf=True, ), dict( - StartLine=4, + LineStart=4, SegmentIndex=4, Text="{ action(); }", NestedIn=2,