-
Notifications
You must be signed in to change notification settings - Fork 559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(uv): parse the dist-manifest.json to not hardcode sha256 in rules_python #2578
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,32 +22,177 @@ load(":uv_repositories.bzl", "uv_repositories") | |
|
||
_DOC = """\ | ||
A module extension for working with uv. | ||
|
||
Use it in your own setup by: | ||
```starlark | ||
uv = use_extension( | ||
"@rules_python//python/uv:uv.bzl", | ||
"uv", | ||
dev_dependency = True, | ||
) | ||
uv.toolchain( | ||
name = "uv_toolchains", | ||
version = "0.5.24", | ||
) | ||
use_repo(uv, "uv_toolchains") | ||
|
||
register_toolchains( | ||
"@uv_toolchains//:all", | ||
dev_dependency = True, | ||
) | ||
``` | ||
|
||
Since this is only for locking the requirements files, it should be always | ||
marked as a `dev_dependency`. | ||
""" | ||
|
||
_DIST_MANIFEST_JSON = "dist-manifest.json" | ||
_DEFAULT_BASE_URL = "https://github.com/astral-sh/uv/releases/download" | ||
|
||
config = tag_class( | ||
doc = "Configure where the binaries are going to be downloaded from.", | ||
attrs = { | ||
"base_url": attr.string( | ||
doc = "Base URL to download metadata about the binaries and the binaries themselves.", | ||
default = _DEFAULT_BASE_URL, | ||
), | ||
}, | ||
) | ||
|
||
platform = tag_class( | ||
doc = "Configure the available platforms for lock file generation.", | ||
attrs = { | ||
"compatible_with": attr.label_list( | ||
doc = "The compatible with constraint values for toolchain resolution", | ||
), | ||
"name": attr.string( | ||
doc = "The platform string used in the UV repository to denote the platform triple.", | ||
mandatory = True, | ||
), | ||
}, | ||
) | ||
|
||
uv_toolchain = tag_class( | ||
doc = "Configure uv toolchain for lock file generation.", | ||
attrs = { | ||
"uv_version": attr.string(doc = "Explicit version of uv.", mandatory = True), | ||
"name": attr.string( | ||
doc = "The name of the toolchain repo", | ||
default = "uv_toolchains", | ||
), | ||
"version": attr.string( | ||
doc = "Explicit version of uv.", | ||
mandatory = True, | ||
), | ||
}, | ||
) | ||
|
||
def _uv_toolchain_extension(module_ctx): | ||
config = { | ||
"platforms": {}, | ||
} | ||
|
||
for mod in module_ctx.modules: | ||
if not mod.is_root and not mod.name == "rules_python": | ||
# Only rules_python and the root module can configure this. | ||
# | ||
# Ignore any attempts to configure the `uv` toolchain elsewhere | ||
# | ||
# Only the root module may configure the uv toolchain. | ||
# This prevents conflicting registrations with any other modules. | ||
# | ||
# NOTE: We may wish to enforce a policy where toolchain configuration is only allowed in the root module, or in rules_python. See https://github.com/bazelbuild/bazel/discussions/22024 | ||
continue | ||
|
||
# Note, that the first registration will always win, giving priority to | ||
# the root module. | ||
|
||
for platform_attr in mod.tags.platform: | ||
config["platforms"].setdefault(platform_attr.name, struct( | ||
name = platform_attr.name.replace("-", "_").lower(), | ||
compatible_with = platform_attr.compatible_with, | ||
)) | ||
|
||
for config_attr in mod.tags.config: | ||
config.setdefault("base_url", config_attr.base_url) | ||
|
||
for toolchain in mod.tags.toolchain: | ||
if not mod.is_root: | ||
fail( | ||
"Only the root module may configure the uv toolchain.", | ||
"This prevents conflicting registrations with any other modules.", | ||
"NOTE: We may wish to enforce a policy where toolchain configuration is only allowed in the root module, or in rules_python. See https://github.com/bazelbuild/bazel/discussions/22024", | ||
) | ||
|
||
uv_repositories( | ||
uv_version = toolchain.uv_version, | ||
register_toolchains = False, | ||
config.setdefault("version", toolchain.version) | ||
config.setdefault("name", toolchain.name) | ||
|
||
if not config["version"]: | ||
return | ||
|
||
config.setdefault("base_url", _DEFAULT_BASE_URL) | ||
config["urls"] = _get_tool_urls_from_dist_manifest( | ||
module_ctx, | ||
base_url = "{base_url}/{version}".format(**config), | ||
) | ||
uv_repositories( | ||
name = config["name"], | ||
platforms = config["platforms"], | ||
urls = config["urls"], | ||
version = config["version"], | ||
) | ||
|
||
def _get_tool_urls_from_dist_manifest(module_ctx, *, base_url): | ||
"""Download the results about remote tool sources. | ||
|
||
This relies on the tools using the cargo packaging to infer the actual | ||
sha256 values for each binary. | ||
""" | ||
dist_manifest = module_ctx.path(_DIST_MANIFEST_JSON) | ||
module_ctx.download(base_url + "/" + _DIST_MANIFEST_JSON, output = dist_manifest) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably want a sha for the dist manifest download? Using untrusted shas to verify a download is equivalent to not using shas There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding the This has been inspired by https://github.com/bazel-contrib/rules_go/blob/7f6a9bf5870f2b5ffbba1615658676dcabf9edc7/go/private/sdk.bzl#L84 where Maybe we should instead an optional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I think I see your point now. Hm. On the one hand, not using shas means, if someone MITM the manifest, then they can control what actual uv stuff is downloaded and then used. On the other hand, using a sha means we are tied to a particular manifest and don't benefit from the automatic-ness of using the manifest. Which is pretty appealing behavior. Hm, not sure how, or if, we can split the difference. These seem at odds. Also:
|
||
dist_manifest = json.decode(module_ctx.read(dist_manifest)) | ||
|
||
artifacts = dist_manifest["artifacts"] | ||
tool_sources = {} | ||
downloads = {} | ||
for fname, artifact in artifacts.items(): | ||
if artifact.get("kind") != "executable-zip": | ||
continue | ||
|
||
checksum = artifacts[artifact["checksum"]] | ||
checksum_fname = checksum["name"] | ||
checksum_path = module_ctx.path(checksum_fname) | ||
downloads[checksum_path] = struct( | ||
download = module_ctx.download( | ||
"{}/{}".format(base_url, checksum_fname), | ||
output = checksum_path, | ||
block = False, | ||
), | ||
archive_fname = fname, | ||
platforms = checksum["target_triples"], | ||
) | ||
|
||
for checksum_path, download in downloads.items(): | ||
result = download.download.wait() | ||
if not result.success: | ||
fail(result) | ||
|
||
archive_fname = download.archive_fname | ||
|
||
sha256, _, checksummed_fname = module_ctx.read(checksum_path).partition(" ") | ||
checksummed_fname = checksummed_fname.strip(" *\n") | ||
if archive_fname != checksummed_fname: | ||
fail("The checksum is for a different file, expected '{}' but got '{}'".format( | ||
archive_fname, | ||
checksummed_fname, | ||
)) | ||
|
||
for platform in download.platforms: | ||
tool_sources[platform] = struct( | ||
urls = ["{}/{}".format(base_url, archive_fname)], | ||
sha256 = sha256, | ||
) | ||
|
||
return tool_sources | ||
|
||
uv = module_extension( | ||
doc = _DOC, | ||
implementation = _uv_toolchain_extension, | ||
tag_classes = {"toolchain": uv_toolchain}, | ||
tag_classes = { | ||
"config": config, | ||
"platform": platform, | ||
"toolchain": uv_toolchain, | ||
}, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something that isn't clear from the doc here is how the word "for" is operating.
Does this define the platforms uv will run on, or the platform that uv will generate a lockfile for? I'm assuming the former.