-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding Script to Generate a Summary of Docker Images Used by Workflows #410
Open
bshifaw
wants to merge
19
commits into
main
Choose a base branch
from
bs_docker_usage_sum
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
e2226cd
add docker_usage_sum.py
b9165b7
updates made to docker_usage_sum.py
387fe74
moved collect_docker_in_system.sh to scripts/docker folder
a9e24fc
adding notes to docker_usage_sum.py
d027cf0
Refactoring, switched tsv columns, logic to look for latest tag for gcr
4995a57
Refactoring, divide up some functions
e0d745b
Note about having gcloud installed
b4eb61c
rm collect_docker_in_system.sh
0a9bdac
Added help message
76a86ea
make backup of old tsv instead of deleting it
c94e680
updated regex pattern
17043ef
changed way of writing to tsv file
77dc9ce
Added py script to create markdown file from docker usage tsv
b9fed9c
edited docker_usage_sum.py so that it can be executed from any dir
fd1db31
renamed docker_usage_md.py to generate_docker_usage_md.py. added gene…
5a2c250
edited markdown title, removed links in header because doesn't work i…
aa50c39
refactored main function
85621db
fix docker line
532e977
Merge branch 'main' into bs_docker_usage_sum
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next
Next commit
add docker_usage_sum.py
- Loading branch information
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
import os | ||
import re | ||
import urllib.request | ||
from urllib.error import HTTPError, URLError | ||
import json | ||
|
||
|
||
# A script to collect which dockers are in use and which latest dockers are available | ||
|
||
def main(): | ||
dir = os.path.abspath(os.path.dirname(__file__)) | ||
# os.chdir(dir) | ||
|
||
print("COLLECTING DOCKERS IN USE...") | ||
# os.chdir("../../wdl") | ||
wdls_dir = os.path.join(dir, "../../wdl") | ||
if os.path.exists("dockers.in_use.tsv"): | ||
os.remove("dockers.in_use.tsv") | ||
|
||
wdl_files = get_wdl_files(dir_to_wdls=wdls_dir) | ||
global_docker_info = [] | ||
|
||
for wdl_path in wdl_files: | ||
|
||
wdl_name = wdl_path | ||
|
||
with open(wdl_path, "r") as file: | ||
content = file.read() | ||
pattern = re.compile(r'.*docker.*"') | ||
if pattern.search(content): | ||
matched_lines = [] | ||
file.seek(0) | ||
lines = file.readlines() | ||
|
||
for line_number, line in enumerate(lines, start=1): | ||
if pattern.search(line): | ||
matched_lines.append((line_number, line.strip())) | ||
|
||
docker_info: list[str] = get_docker_info_from_string( | ||
wdl_lines=matched_lines, wdl_name=wdl_name | ||
) | ||
|
||
sorted_info: list = sorted(docker_info, reverse=False) | ||
|
||
global_docker_info.append(sorted_info) | ||
|
||
with open("dockers.in_use.tsv", "a") as tsv_file: | ||
tsv_file.write(f"name\tused_tag\tlatest_tag\tline\twdl\n") | ||
for line in sorted(global_docker_info): | ||
tsv_file.write("\n".join(line) + "\n") | ||
|
||
print("DONE. PLEASE CHECKOUT TSV FILE: dockers.in_use.tsv") | ||
# os.chdir(dir) | ||
# os.rename("../../wdl/dockers.in_use.tsv", "dockers.in_use.tsv") | ||
|
||
|
||
def get_wdl_files(dir_to_wdls: str) -> list: | ||
""" | ||
Returns a list of wdl files | ||
@return: | ||
""" | ||
wdl_files = [] | ||
for root, _, files in os.walk(dir_to_wdls): | ||
for filename in files: | ||
if filename.endswith(".wdl"): | ||
wdl_path = os.path.join(root, filename) | ||
wdl_files.append(wdl_path) | ||
|
||
return wdl_files | ||
|
||
|
||
def get_docker_info_from_string(wdl_lines: [tuple], wdl_name: str) -> list: | ||
""" | ||
Returns a list of docker info | ||
@param wdl_name: | ||
@param wdl_lines: (line_number, line_content) | ||
@return: | ||
""" | ||
docker_detail = [] | ||
|
||
for line_num, line_content in wdl_lines: | ||
docker_names = re.findall(r'docker.*"(\S*?)"', line_content) | ||
if docker_names: | ||
docker_name = docker_names[0] | ||
used_tag = os.path.basename(docker_name).split(":")[1] | ||
docker_path = docker_name.split(":")[0] | ||
latest_tag = get_latest_local_docker_tag(docker_path) | ||
latest_tag = get_latest_remote_docker_tag( | ||
docker_path) if latest_tag == "NA" else latest_tag | ||
docker_detail.append( | ||
f"{docker_path}\t{used_tag}\t{latest_tag}\t{line_num}\t{wdl_name}") | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove else pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
pass | ||
|
||
return docker_detail | ||
|
||
|
||
def get_latest_remote_docker_tag(docker_path: str) -> str: | ||
""" | ||
Returns the latest tag of a docker | ||
@param docker_path: | ||
@return: | ||
""" | ||
if "gcr" in docker_path: | ||
latest_tag = get_latest_tag_from_gcr(docker_path) | ||
elif "quay.io" in docker_path: | ||
latest_tag = get_latest_tag_from_quayio(docker_path) | ||
else: | ||
latest_tag = get_latest_tag_from_duckerhub(docker_path) | ||
return latest_tag | ||
|
||
|
||
def get_latest_tag_from_duckerhub(docker_path: str) -> str: | ||
image_name = docker_path | ||
registry_url = f"https://registry.hub.docker.com/v2/repositories/{image_name}/tags/?page_size=1&ordering=last_updated" | ||
try: | ||
with urllib.request.urlopen(registry_url) as response: | ||
data = response.read().decode("utf-8") | ||
json_data = json.loads(data) | ||
tags = json_data.get("results") | ||
if tags: | ||
latest_tag = tags[0].get("name") | ||
return latest_tag | ||
else: | ||
return "NA" | ||
except urllib.error.HTTPError as e: | ||
# print(f"Error: {e.code} - {e.reason}") | ||
pass | ||
except urllib.error.URLError as e: | ||
# print(f"Error: Failed to reach the server - {e.reason}") | ||
pass | ||
|
||
|
||
def get_latest_tag_from_gcr(docker_path: str) -> str: | ||
# Split the image string into project ID and image name | ||
parts = docker_path.split("/") | ||
gcr_repo = parts[0] | ||
project_id = parts[1] | ||
image_name = "/".join(parts[2:]) | ||
# Construct the URL for retrieving tags | ||
registry_url = f"https://{gcr_repo}/v2/{project_id}/{image_name}/tags/list" | ||
|
||
try: | ||
# Send the GET request to the Container Registry API | ||
with urllib.request.urlopen(registry_url) as response: | ||
data = response.read().decode("utf-8") | ||
json_data = json.loads(data) | ||
tags = json_data.get("tags") | ||
if tags: | ||
latest_tag = max(tags) | ||
return latest_tag | ||
else: | ||
return "NA" | ||
except urllib.error.HTTPError as e: | ||
# print(f"Error: {e.code} - {e.reason}") | ||
pass | ||
except urllib.error.URLError as e: | ||
# print(f"Error: Failed to reach the server - {e.reason}") | ||
pass | ||
|
||
|
||
def get_latest_tag_from_quayio(docker_path: str) -> str: | ||
# Split the image string into project ID and image name | ||
parts = docker_path.split("/") | ||
quayio_repo = parts[0] | ||
project_id = parts[1] | ||
image_name = "/".join(parts[2:]) | ||
# Construct the URL for retrieving tags | ||
registry_url = f"https://{quayio_repo}/v2/{project_id}/{image_name}/tags/list" | ||
|
||
try: | ||
# Send the GET request to the Container Registry API | ||
with urllib.request.urlopen(registry_url) as response: | ||
data = response.read().decode("utf-8") | ||
json_data = json.loads(data) | ||
tags = json_data.get("tags") | ||
if tags: | ||
latest_tag = max(tags) | ||
return latest_tag | ||
else: | ||
return "NA" | ||
except urllib.error.HTTPError as e: | ||
# print(f"Error: {e.code} - {e.reason}") | ||
pass | ||
except urllib.error.URLError as e: | ||
# print(f"Error: Failed to reach the server - {e.reason}") | ||
pass | ||
|
||
|
||
def get_latest_local_docker_tag(docker_path: str) -> str: | ||
""" | ||
Returns the latest tag of a docker | ||
@param docker_path: | ||
@return: | ||
""" | ||
docker_name = os.path.basename(docker_path) | ||
docker_dir = "../docker" | ||
latest_tag = "NA" | ||
|
||
for docker_im_dir in os.listdir(docker_dir): | ||
if docker_im_dir == docker_name: | ||
docker_dir_path = os.path.join(docker_dir, docker_im_dir) | ||
for makefile in os.listdir(docker_dir_path): | ||
if not makefile.endswith("Makefile"): | ||
continue | ||
|
||
with open(os.path.join(docker_dir_path, makefile)) as f: | ||
for makefile_line in f: | ||
if "VERSION =" in makefile_line: | ||
latest_tag = makefile_line.split("=")[1].strip() | ||
|
||
return latest_tag | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add to a help message
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done