diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index afbf3d4497..c0aef391fa 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -1,14 +1,14 @@ -# Name of Semgrep-Actions workflow +# Name of Semgrep-Actions workflow name: semgrep-scan on: # Scan changed files in PRs (diff-aware scanning): pull_request: {} - # Scan mainline branches and report all findings: + # Scan mainline branches and report all findings: push: branches: ["ci_dev", "ci_temporary", "main", "opensource_gsf"] schedule: - - cron: '0 */12 * * *' + - cron: '0 */12 * * *' defaults: run: @@ -16,7 +16,7 @@ defaults: jobs: semgrep: - + name: Scan runs-on: ubuntu-latest container: @@ -37,10 +37,10 @@ jobs: # Period (.) to run scan in the current directory to avoid blocking findings in the future use --exclude=+BLOCKING - name: Run Scan - 1 (Default Ruleset) run: semgrep --config=p/default . - + - name: Run Scan - 2 (Ruleset for Python) run: semgrep --config=p/python . - + - name: Run Scan - 3 (Ruleset for Dockerfile) run: semgrep --config=p/dockerfile . diff --git a/graphstorm-processing/tests/test_row_count_utils.py b/graphstorm-processing/tests/test_row_count_utils.py index 23e9efd6d3..fd5e3643be 100644 --- a/graphstorm-processing/tests/test_row_count_utils.py +++ b/graphstorm-processing/tests/test_row_count_utils.py @@ -233,6 +233,27 @@ def test_add_counts_to_metadata(row_counter, test_metadata): assert "row_counts" in updated_metadata["node_data"]["type2"]["feature1"] +def test_edge_data_row_counts(row_counter, test_metadata): + """Test the row counts for edge data features.""" + updated_metadata = row_counter.add_row_counts_to_metadata(test_metadata) + + # Check that edge data counts were added + edge_type = "type1:edge:type2" + edge_feature = "weight" + + # Verify row counts exist and are correct + assert "row_counts" in updated_metadata["edge_data"][edge_type][edge_feature] + assert updated_metadata["edge_data"][edge_type][edge_feature]["row_counts"] == [10, 15] + + # Verify edge data counts match edge structure counts + edge_feature_counts = updated_metadata["edge_data"][edge_type][edge_feature]["row_counts"] + edge_structure_counts = updated_metadata["edges"][edge_type]["row_counts"] + assert edge_feature_counts == edge_structure_counts + + # Test that the total number of rows is correct + assert sum(edge_feature_counts) == 25 + + def test_verify_features_and_structure_match(): """Test verification of feature and structure row counts.""" structure_meta = {"type1": {"row_counts": [10, 15], "data": ["file1.parquet", "file2.parquet"]}}