lanl · sayefsakin · Jul 3, 2024 · Jul 3, 2024 · Jul 5, 2024 · Jul 8, 2024
diff --git a/.github/workflows/perf_check.yml b/.github/workflows/perf_check.yml
@@ -0,0 +1,159 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Code Performance Analyzer
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the "main" branch
+  # push:
+  #   branches: [ "main" ]
+  # pull_request:
+  #   branches: [ "main" ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+    # inputs:
+    #   hashes:
+    #     required: true
+    #     type: choice
+    #     description: Make a choice
+    #     options:
+    #     - foo
+    #     - bar
+    #     - baz
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  define-matrix:
+    runs-on: ubuntu-latest
+
+    outputs:
+      hashes: ${{ steps.hashes.outputs.hashes }}
+
+    steps:
+      - name: Define Hashes
+        id: hashes
+        run: |
+          echo 'hashes=["158e23d08f73d36f71e144851451955b3ae02dff", "89cc919b28f687a25d30b44ddf547201da930c14"]' >> "$GITHUB_OUTPUT"
+
+  produce-performance-artifacts:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+    needs: define-matrix
+    strategy:
+      matrix:
+        hashes: ${{ fromJSON(needs.define-matrix.outputs.hashes) }}
+
+    steps:      
+      - uses: actions/checkout@v4
+        with:
+          repository: UK-MAC/CloverLeaf_ref
+          ref: ${{ matrix.hashes }}
+
+      - uses: fortran-lang/[email protected]
+
+      - name: Install OpenMPI
+        run: sudo apt install -y openmpi-bin libopenmpi-dev
+
+    # check all the requirements and their versions
+      - name: Check installed dependencies
+        run: |
+            gcc --version
+            gfortran --version
+            mpirun --version
+            lscpu | grep -E '^Thread|^Core|^Socket|^CPU\('
+
+      - name: Compile cloverleaf
+        run: |
+            make COMPILER=GNU
+
+      - name: Run cloverleaf
+        run: |
+            mpirun -np 2 clover_leaf
+            mv clover.out clover_output_${{ matrix.hashes }}.out
+
+      - name: Produce Artifact
+        uses: actions/upload-artifact@v4
+        with:
+            name: clover_artifact_${{ matrix.hashes }}
+            path: clover_output_${{ matrix.hashes }}.out
+
+  consume-artifacts:
+    runs-on: macos-latest
+    needs:
+    - produce-performance-artifacts
+
+    steps:
+    - name: Download all workflow run artifacts
+      uses: actions/download-artifact@v4
+      with:
+        path: clover_artifact
+        pattern: clover_artifact_*
+        merge-multiple: true
+
+    - name: Check artifact files
+      run: |
+        ls -R clover_artifact
+        cd clover_artifact
+        tail -n 10 clover_output_*
+#   # This workflow contains a single job called "build"
+#   build:
+#     # The type of runner that the job will run on
+#     runs-on: macos-latest
+
+#     defaults:
+#       run:
+#         shell: bash -el {0}
+
+#     # Steps represent a sequence of tasks that will be executed as part of the job
+#     steps:
+#       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+#       - uses: actions/checkout@v4
+#         # with:
+#         #   repository: UK-MAC/CloverLeaf_ref
+#         #   ref: 0ddf495cf21cc59f84e274617522a1383e2c328c
+
+#     #   - uses: actions/setup-python@v5
+#     #     with:
+#     #         python-version: '3.10'
+
+#     #   - name: Add conda to system path
+#     #     run: |
+#     #         # $CONDA is an environment variable pointing to the root of the miniconda directory
+#     #         echo $CONDA/bin >> $GITHUB_PATH
+
+
+
+    #   - uses: conda-incubator/setup-miniconda@v3
+    #     with:
+    #         channels: defaults,conda-forge,spyder-ide
+    #         activate-environment: cdsi
+    #         environment-file: examples/cloverleaf/environment.yml
+    #         auto-activate-base: false
+
+#     #   - uses: s-weigand/[email protected]
+
+#     #   - name: Install dependencies
+#     #     run: |
+#     #         cd examples/cloverleaf
+#     #         conda env create --file environment.yml --name cdsi
+#     #         conda activate cdsi
+
+    #   # check all the requirements and their versions
+    #   - name: Check installed dependencies
+    #     run: |
+    #       python3 --version
+    #       gcc --version
+    #       conda --version
+    #       gfortran --version
+    #       conda info
+    #       conda list
+
+#       # Runs a set of commands using the runners shell
+#       - name: Run a multi-line script
+#         run: |
+#           echo Add other actions to build,
+#           echo test, and deploy your project.
+#           ls .
diff --git a/tools/perf_analyzer/Readme.md b/tools/perf_analyzer/Readme.md
@@ -0,0 +1,32 @@
+## PerfAnalyzer
+
+A tool to analze software performance along with code history. This is built on top of DSI SQLite plugin.
+
+Run `fly_server.py` file. Then the dashboard can be accessed through `http://127.0.0.1:8050/`.
+
+##### TODO: add a requirement file
+
+Update the `runner_script.sh` to compile, copy input file, and run the program.
+
+Update the `parse_clover_output.py` file and update `parse_clover_output_file` function to parse specific output file. Return a dictionary containing the contents of the parsed output file.
+
+#### The features available in the dashboard
+
+PerfAnalyzer is a dashboard based visualizer to analyze performance using git commit history and different performance metric. This has the following features
+
+- Git History Graph
+  - Ordered by commit dates
+  - Filter Git Branch
+  - Select a subset of git commits 
+  - Show Commit details like message, committer name, date, and hash.
+- Performance metric line chart
+  - Filter by different metric
+  - Show details on hover
+ - Commit table
+   - Search and filter by date, hash, and message.
+   - Execute the `runner_script` on selected commit.
+   - Show difference between two commits (using git diff)
+ - Variable Search
+   - Use any regex or string to search
+   - Show table of found variable and file
+   - Show file content
diff --git a/tools/perf_analyzer/auto_perf_check.sh b/tools/perf_analyzer/auto_perf_check.sh
@@ -0,0 +1,64 @@
+#! /bin/bash
+
+# make sure that cdsi environment is activated
+if [[ $CONDA_DEFAULT_ENV != 'cdsi' ]]; then
+    echo "activate conda cdsi environment."
+    exit 0
+fi
+if [ -z ${SOURCE_BASE_DIRECTORY+x} ]; then
+    echo "SOURCE_BASE_DIRECTORY is unset"
+    exit 0;
+else
+    echo "SOURCE_BASE_DIRECTORY is set to '$SOURCE_BASE_DIRECTORY'"; 
+fi
+
+# SOURCE_BASE_DIRECTORY="/Users/ssakin/projects/CloverLeaf/CloverLeaf_ref"
+MPI_THREADS=4
+export CHECK_PREV_COMMITS=15
+export OMP_NUM_THREADS=4
+base_directory=$(pwd)
+
+run_and_check_commit() {
+    echo "current commit hash $1"
+
+    cd $SOURCE_BASE_DIRECTORY
+    git checkout $1
+    make clean
+    make COMPILER=GNU
+    echo "================================ Compile Done ================================ "
+
+    echo "============================= Running CloverLeaf ============================= "
+    mpirun -np $MPI_THREADS clover_leaf
+    cp clover.out $base_directory"/clover_output/clover_$1.out"
+    echo "======================= CloverLeaf Executed for has $1 ======================= "
+
+    echo "=========================== Running output parser ============================ "
+    cd $base_directory
+    python3 parse_clover_output.py --testname random_test --gitdir $SOURCE_BASE_DIRECTORY
+    echo "============================ Output CSV updated ============================== "
+}
+
+track_variables() {
+    echo "current commit hash $1"
+
+    cd $SOURCE_BASE_DIRECTORY
+    git checkout $1
+
+    echo "=========================== Running code sensing ============================ "
+    cd $base_directory
+    python3 code_sensing.py --testname random_test --gitdir $SOURCE_BASE_DIRECTORY
+    echo "============================ Output CSV updated ============================== "
+}
+
+cd $SOURCE_BASE_DIRECTORY
+prev_hash=( $(git log master -n "$CHECK_PREV_COMMITS" --format=format:%h) )
+
+for c_hash in "${prev_hash[@]}"
+do
+#    run_and_check_commit $c_hash
+    track_variables $c_hash
+done
+
+cd $SOURCE_BASE_DIRECTORY
+git checkout master
+echo "=========================== Auto Perf Script Completed ============================ "
diff --git a/tools/perf_analyzer/code_sensing.py b/tools/perf_analyzer/code_sensing.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import argparse
+import pickle
+import sys
+import glob
+import re
+import git
+
+def recursive_c_directive_match(re_list, search_file_list, cur_dir):
+    """ The data is parsed from all of the files in the current directory """
+    occurance = dict()
+    for code_files in glob.iglob('**', root_dir=cur_dir, recursive=True):
+        for f_type in search_file_list:
+            if re.search(f_type + '$', code_files):
+                with open(cur_dir + "/" + code_files, 'r') as cf:
+                    line_number = 1
+                    for line in cf:
+                        for each_re in re_list:
+                            line_match = re.compile(r"\s*[#]" + each_re + r"\s+(\w+)[\t\s]+(.*)\s*(\r\n|\r|\n)").match(line)
+                            if line_match is not None:
+                                c_line = "#" + each_re + " " + line_match.group(1)
+                                second_part = line_match.group(2)
+                                if second_part is not None and len(second_part) > 0:
+                                    c_line = c_line + " " + line_match.group(2)
+                                c_line = c_line.rstrip()
+                                occurance[c_line] = occurance.get(c_line, dict())
+                                # occurance[line]["first"] = line_match.group(1)
+                                # occurance[line]["second"] = line_match.group(2)
+                                occurance[c_line][code_files] = occurance[c_line].get(code_files, list())
+                                occurance[c_line][code_files].append(line_number)
+                        line_number = line_number + 1
+    print("matching done")
+    return occurance
+
+def recursive_customized_match(re_list, search_file_list, cur_dir):
+    """ The data is parsed from all of the files in the current directory """
+    occurance = dict()
+    for code_files in glob.iglob('**', root_dir=cur_dir, recursive=True):
+        for f_type in search_file_list:
+            if re.search(f_type + '$', code_files):
+                with open(cur_dir + "/" + code_files, 'r') as cf:
+                    line_number = 1
+                    for line in cf:
+                        for each_re in re_list:
+                            line_match = re.compile(r".*(" + each_re + r").*").match(line)
+                            if line_match is not None:
+                                # c_line = line_match.group(1)
+                                c_line = line.rstrip()
+                                occurance[c_line] = occurance.get(c_line, dict())
+                                # occurance[line]["first"] = line_match.group(1)
+                                # occurance[line]["second"] = line_match.group(2)
+                                occurance[c_line][code_files] = occurance[c_line].get(code_files, list())
+                                occurance[c_line][code_files].append(line_number)
+                        line_number = line_number + 1
+    print("matching done")
+    return occurance
+
+def main():
+    """ A testname argument is required """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--testname', help='the test name')
+    parser.add_argument('--gitdir', help='the git directory')
+    args = parser.parse_args()
+    # testname = "temp_test"
+    testname = args.testname
+    git_repo = args.gitdir
+    if testname is None or git_repo is None:
+        parser.print_help()
+        sys.exit(0)
+
+    git_hash = git.Repo(git_repo).head.object.hexsha
+    re_list = ["pragma", "define"]
+    search_file_list = [r"\.c", r"\.cc"]
+    # occ = recursive_c_directive_match(re_list, search_file_list, git_repo)
+
+    # with open(git_hash + '.pickle', 'wb') as handle:
+    #     pickle.dump(occ, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    # re_list = [r"OMP PARALLEL", r"vol=0\.0", r"\w+=\d+\.\d+"]
+    search_file_list = [r"\.c", r"\.cc", r"\.f90"]
+    recursive_customized_match(re_list, search_file_list, git_repo)
+
+    # with open(git_hash + '.pickle', 'rb') as handle:
+    #     b = pickle.load(handle)
+    # print(b)
+
+if __name__ == '__main__':
+    main()
+