-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
392 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,9 @@ on: | |
paths: | ||
- ".github/workflows/update_test_file_ratings.yml" | ||
- "torchci/scripts/calculate_file_test_rating.py" | ||
- "torchci/scripts/test_calculate_file_test_rating.py" | ||
- "torchci/scripts/td_heuristic_historical_edited_files.py" | ||
- "torchci/scripts/td_heuristic_profiling.py" | ||
- "torchci/scripts/get_merge_base_info.py" | ||
schedule: | ||
- cron: 5 11 * * * # At 11:05 UTC every day or about 4am PT | ||
|
@@ -46,6 +49,10 @@ jobs: | |
- name: Generate file test ratings | ||
run: | | ||
python3 test-infra/torchci/scripts/calculate_file_test_rating.py | ||
python3 test-infra/torchci/scripts/td_heuristic_historical_edited_files.py | ||
# Do not run this one, it won't change | ||
# python3 test-infra/torchci/scripts/td_heuristic_profiling.py | ||
env: | ||
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }} | ||
|
||
|
@@ -76,3 +83,17 @@ jobs: | |
user_email: "[email protected]" | ||
user_name: "Pytorch Test Infra" | ||
commit_message: "Updating file to test class correlations" | ||
|
||
- name: Push historical edited files heuristic to test-infra repository | ||
if: github.event_name != 'pull_request' | ||
uses: dmnemec/copy_file_to_another_repo_action@eebb594efdf52bc12e1b461988d7254322dac131 | ||
env: | ||
API_TOKEN_GITHUB: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
source_file: "td_heuristic_historical_edited_files.json" | ||
destination_repo: "pytorch/test-infra" | ||
destination_folder: "stats" | ||
destination_branch: generated-stats | ||
user_email: "[email protected]" | ||
user_name: "Pytorch Test Infra" | ||
commit_message: "Updating TD heuristic: historical edited files" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,3 +125,6 @@ docs/_build/ | |
|
||
# Pyenv | ||
.python-version | ||
|
||
# torchci caching utils | ||
.torchci_python_utils_cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import json | ||
from collections import defaultdict | ||
from typing import Dict | ||
|
||
from utils_td_heuristics import ( | ||
cache_json, | ||
evaluate, | ||
get_all_invoking_files, | ||
get_filtered_failed_tests, | ||
get_merge_bases_dict, | ||
list_past_year_shas, | ||
query_rockset, | ||
) | ||
|
||
CHANGED_FILES_QUERY = """ | ||
select | ||
sha, | ||
changed_files | ||
from | ||
commons.merge_bases | ||
where | ||
ARRAY_CONTAINS(SPLIT(:shas, ','), sha) | ||
""" | ||
|
||
|
||
@cache_json | ||
def gen_correlation_dict() -> Dict[str, Dict[str, float]]: | ||
shas = list_past_year_shas() | ||
|
||
interval = 500 | ||
commits = [] | ||
for i in range(0, len(shas), interval): | ||
commits.extend( | ||
query_rockset( | ||
CHANGED_FILES_QUERY, | ||
params={"shas": ",".join(shas[i : i + interval])}, | ||
use_cache=True, | ||
) | ||
) | ||
|
||
invoking_files = get_all_invoking_files() | ||
|
||
d = defaultdict(lambda: defaultdict(float)) | ||
for commit in commits: | ||
changed_files = commit["changed_files"] | ||
test_files = [x[5:-3] for x in changed_files if x[5:-3] in invoking_files] | ||
for test_file in test_files: | ||
for file in changed_files: | ||
d[file][test_file] += 1 / len(changed_files) | ||
return d | ||
|
||
|
||
if __name__ == "__main__": | ||
correlation_dict = gen_correlation_dict() | ||
merge_bases = get_merge_bases_dict() | ||
filtered_tests = get_filtered_failed_tests() | ||
|
||
evaluate(filtered_tests, merge_bases, correlation_dict) | ||
|
||
with open("td_heuristic_historical_edited_files.json", mode="w") as file: | ||
json.dump(correlation_dict, file, sort_keys=True, indent=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import json | ||
|
||
import requests | ||
from utils_td_heuristics import evaluate, get_filtered_failed_tests, get_merge_bases_dict | ||
|
||
|
||
def get_profiling_dict(): | ||
# The dict should be generated elsewhere and this function modified to | ||
# retrieve the data. | ||
url = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/td_heuristic_profiling.json" | ||
return json.loads(requests.get(url).text) | ||
|
||
|
||
def main() -> None: | ||
correlation_dict = get_profiling_dict() | ||
merge_bases = get_merge_bases_dict() | ||
filtered_tests = get_filtered_failed_tests() | ||
|
||
evaluate(filtered_tests, merge_bases, correlation_dict) | ||
|
||
with open("td_heuristic_profiling.json", mode="w") as file: | ||
json.dump(correlation_dict, file, sort_keys=True, indent=2) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import datetime | ||
from hashlib import sha256 | ||
import json | ||
import os | ||
import pathlib | ||
import subprocess | ||
from typing import List, Union | ||
|
||
|
||
FILE_CACHE_LIFESPAN_SECONDS = 60 * 60 * 24 # 1 day | ||
REPO_ROOT = pathlib.Path(__file__).parent.parent.parent | ||
CACHE_FOLDER = REPO_ROOT / ".torchci_python_utils_cache" | ||
|
||
|
||
def js_beautify(obj): | ||
# Like json.dumps with indent=2, but only at the first level. Nice for | ||
# dictionaries of str -> really long list | ||
import jsbeautifier | ||
|
||
opts = jsbeautifier.default_options() | ||
opts.indent_size = 2 | ||
return jsbeautifier.beautify(json.dumps(obj), opts) | ||
|
||
|
||
def run_command(command: Union[str, List[str]]) -> str: | ||
# Runs command in pytorch folder. Assumes test-infra and pytorch are in the | ||
# same folder. | ||
if isinstance(command, str): | ||
command = command.split(" ") | ||
cwd = REPO_ROOT / ".." / "pytorch" | ||
return ( | ||
subprocess.check_output( | ||
command, | ||
cwd=cwd, | ||
) | ||
.decode("utf-8") | ||
.strip() | ||
) | ||
|
||
|
||
def cache_json(func): | ||
# Requires that both input and output but json serializable. | ||
# Decorator for caching function results into a file so it can be reused betwen runs. | ||
os.makedirs(CACHE_FOLDER, exist_ok=True) | ||
|
||
def wrapper(*args, **kwargs): | ||
os.makedirs(CACHE_FOLDER, exist_ok=True) | ||
args_key = sha256(json.dumps(args).encode("utf-8")).hexdigest() | ||
kwargs_key = sha256( | ||
json.dumps(kwargs, sort_keys=True).encode("utf-8") | ||
).hexdigest() | ||
file_name = f"{func.__name__} args={args_key} kwargs={kwargs_key}.json" | ||
|
||
if os.path.exists(CACHE_FOLDER / file_name): | ||
now = datetime.datetime.now() | ||
mtime = datetime.datetime.fromtimestamp( | ||
(CACHE_FOLDER / file_name).stat().st_mtime | ||
) | ||
diff = now - mtime | ||
if diff.total_seconds() < FILE_CACHE_LIFESPAN_SECONDS: | ||
return json.load(open(CACHE_FOLDER / file_name)) | ||
|
||
res = func(*args, **kwargs) | ||
with open(CACHE_FOLDER / file_name, "w") as f: | ||
f.write(json.dumps(res)) | ||
return res | ||
|
||
return wrapper |
Oops, something went wrong.