Skip to content

Commit

Permalink
Add include/exclude filter support for pull-through caching
Browse files Browse the repository at this point in the history
fixes: #706
  • Loading branch information
gerrod3 committed Oct 16, 2024
1 parent 0f92af4 commit 375bf2f
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGES/706.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pull-through caching now respects the include/exclude filters on the upstream remote.
3 changes: 3 additions & 0 deletions docs/user/guides/publish.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ from the remote source and have Pulp store that package as orphaned content.
pulp python distribution update --name foo --remote bar
```

!!! note
Pull-through caching will respect the includes/excludes filters on the supplied remote.

!!! warning
Support for pull-through caching is provided as a tech preview in Pulp 3.
Functionality may not work or may be incomplete. Also, backwards compatibility when upgrading
Expand Down
52 changes: 31 additions & 21 deletions pulp_python/app/pypi/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import requests
import os

from rest_framework.viewsets import ViewSet
from rest_framework.response import Response
Expand All @@ -15,15 +16,16 @@
Http404,
HttpResponseForbidden,
HttpResponseBadRequest,
StreamingHttpResponse
StreamingHttpResponse,
HttpResponse,
)
from drf_spectacular.utils import extend_schema
from dynaconf import settings
from itertools import chain
from packaging.utils import canonicalize_name
from urllib.parse import urljoin, urlparse, urlunsplit
from pathlib import PurePath
from pypi_simple import parse_links_stream_response
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage

from pulpcore.plugin.viewsets import OperationPostponedResponse
from pulpcore.plugin.tasking import dispatch
Expand All @@ -45,6 +47,7 @@
python_content_to_json,
PYPI_LAST_SERIAL,
PYPI_SERIAL_CONSTANT,
get_remote_package_filter,
)

from pulp_python.app import tasks
Expand Down Expand Up @@ -232,27 +235,34 @@ def list(self, request, path):

def pull_through_package_simple(self, package, path, remote):
"""Gets the package's simple page from remote."""
def parse_url(link):
parsed = urlparse(link.url)
digest, _, value = parsed.fragment.partition('=')
def parse_package(dis_package):
parsed = urlparse(dis_package.url)
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
redirect = f'{path}/{link.text}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect)
return link.text, d_url, value if digest == 'sha256' else ''
redirect_path = f'{path}/{dis_package.filename}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect_path)
return dis_package.filename, d_url, dis_package.digests.get("sha256", "")

rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
raise Http404(f"{package} does not exist.")

url = remote.get_remote_artifact_url(f'simple/{package}/')
kwargs = {}
if proxy_url := remote.proxy_url:
if remote.proxy_username or remote.proxy_password:
parsed_proxy = urlparse(proxy_url)
netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}"
proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", ""))
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}

response = requests.get(url, stream=True, **kwargs)
links = parse_links_stream_response(response)
packages = (parse_url(link) for link in links)
return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True))
remote.headers = remote.headers or []
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
downloader = remote.get_downloader(url=url, max_retries=1)
try:
d = downloader.fetch()
except Exception:
raise Http404(f"Could not find {package}.")

if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url)
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url)
packages = [
parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)
]
return HttpResponse(write_simple_detail(package, packages))

@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
Expand Down
78 changes: 77 additions & 1 deletion pulp_python/app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from django.conf import settings
from jinja2 import Template
from packaging.utils import canonicalize_name
from packaging.version import parse
from packaging.requirements import Requirement
from packaging.version import parse, InvalidVersion


PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
Expand Down Expand Up @@ -317,3 +318,78 @@ def write_simple_detail(project_name, project_packages, streamed=False):
detail = Template(simple_detail_template)
context = {"project_name": project_name, "project_packages": project_packages}
return detail.stream(**context) if streamed else detail.render(**context)


class PackageIncludeFilter:
"""A special class to help filter Package's based on a remote's include/exclude"""

def __init__(self, remote):
self.remote = remote.cast()
self._filter_includes = self._parse_packages(self.remote.includes)
self._filter_excludes = self._parse_packages(self.remote.excludes)

def _parse_packages(self, packages):
config = defaultdict(lambda: defaultdict(list))
for value in packages:
requirement = Requirement(value)
requirement.name = canonicalize_name(requirement.name)
if requirement.specifier:
requirement.specifier.prereleases = True
config["range"][requirement.name].append(requirement)
else:
config["full"][requirement.name].append(requirement)
return config

def filter_project(self, project_name):
"""Return true/false if project_name would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
include_full = self._filter_includes.get("full", {})
include_range = self._filter_includes.get("range", {})
include = set(include_range.keys()).union(include_full.keys())
if include and project_name not in include:
return False

exclude_full = self._filter_excludes.get("full", {})
if project_name in exclude_full:
return False

return True

def filter_release(self, project_name, version):
"""Returns true/false if release would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
if not self.filter_project(project_name):
return False

try:
version = parse(version)
except InvalidVersion:
return False

include_range = self._filter_includes.get("range", {})
if project_name in include_range:
for req in include_range[project_name]:
if version in req.specifier:
break
else:
return False

exclude_range = self._filter_excludes.get("range", {})
if project_name in exclude_range:
for req in exclude_range[project_name]:
if version in req.specifier:
return False

return True


_remote_filters = {}
def get_remote_package_filter(remote):
if date_filter_tuple := _remote_filters.get(remote.pulp_id):
last_update, rfilter = date_filter_tuple
if last_update == remote.pulp_last_updated:
return rfilter

rfilter = PackageIncludeFilter(remote)
_remote_filters[remote.pulp_id] = (remote.pulp_last_updated, rfilter)
return rfilter
41 changes: 41 additions & 0 deletions pulp_python/tests/functional/api/test_full_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
from pulp_python.tests.functional.constants import (
PYPI_URL,
PYTHON_XS_FIXTURE_CHECKSUMS,
PYTHON_SM_PROJECT_SPECIFIER,
PYTHON_SM_FIXTURE_RELEASES,
)

from pypi_simple import ProjectPage
from packaging.version import parse
from urllib.parse import urljoin, urlsplit


Expand Down Expand Up @@ -54,6 +57,44 @@ def test_pull_through_simple(python_remote_factory, python_distribution_factory,
assert PYTHON_XS_FIXTURE_CHECKSUMS[package.filename] == package.digests["sha256"]


@pytest.mark.parallel
def test_pull_through_filter(python_remote_factory, python_distribution_factory):
"""Tests that pull-through respects the includes/excludes filter on the remote."""
remote = python_remote_factory(url=PYPI_URL, includes=["shelf-reader"])
distro = python_distribution_factory(remote=remote.pulp_href)

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
assert r.json() == {'detail': 'pulpcore does not exist.'}

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200

# Test complex include specifiers
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
distro = python_distribution_factory(remote=remote.pulp_href)
for package, releases in PYTHON_SM_FIXTURE_RELEASES.items():
url = f"{distro.base_url}simple/{package}/"
project_page = ProjectPage.from_response(requests.get(url), package)
packages = {p.filename for p in project_page.packages if not parse(p.version).is_prerelease}
assert packages == set(releases)

# Test exclude logic
remote = python_remote_factory(includes=[], excludes=["django"])
distro = python_distribution_factory(remote=remote.pulp_href)

r = requests.get(f"{distro.base_url}simple/django/")
assert r.status_code == 404
assert r.json() == {'detail': 'django does not exist.'}

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
assert r.json() == {'detail': 'Could not find pulpcore.'}

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200


@pytest.mark.parallel
def test_pull_through_with_repo(
python_repo_with_sync, python_remote_factory, python_distribution_factory
Expand Down

0 comments on commit 375bf2f

Please sign in to comment.