Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support to parse labels in dockerfile #3987

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ packages =
regipy >= 3.1.0; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
go-inspector >= 0.5.0; platform_system == 'Linux'

rust-inspector >= 0.1.0; platform_system == 'Linux'
develop

[options.entry_points]
console_scripts =
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ packages =
regipy >= 3.1.0; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
go-inspector >= 0.5.0; platform_system == 'Linux'
support-OCI-labels
rust-inspector >= 0.1.0; platform_system == 'Linux'


[options.entry_points]
console_scripts =
Expand Down
2 changes: 2 additions & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from packagedcode import debian
from packagedcode import debian_copyright
from packagedcode import distro
from packagedcode import dockerfile
from packagedcode import conda
from packagedcode import conan
from packagedcode import cocoapods
Expand Down Expand Up @@ -95,6 +96,7 @@
debian.DebianSourcePackageTarballHandler,

distro.EtcOsReleaseHandler,
dockerfile.DockerfileHandler,

freebsd.CompactManifestHandler,

Expand Down
59 changes: 59 additions & 0 deletions src/packagedcode/dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#



import io
from pathlib import Path
from dockerfile_parse import DockerfileParser
from packagedcode import models
from packagedcode import utils
import fnmatch


class DockerfileHandler(models.DatafileHandler):
datasource_id = 'dockerfile_oci_labels'

@classmethod
def is_datafile(cls, path):
patterns = ['Dockerfile', 'containerfile', '*.dockerfile']
filename=os.path.basename(path)
for pattern in patterns:
if fnmatch.fnmatch(filename, pattern):
return True
return False

@classmethod
def parse(cls, location, package_only=False):
"""
Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata.
"""
labels = cls.extract_oci_labels_from_dockerfile(location)
package_data = {
'datasource_id': cls.datasource_id,
'type': cls.default_package_type,
'name': labels.get('name', 'None'),
'version': labels.get('version', 'None'),
'license_expression': labels.get('license', 'None'),
'labels': labels,
}

yield models.PackageData.from_data(package_data, package_only)

@classmethod
def extract_oci_labels_from_dockerfile(cls, dockerfile_path):
"""
Extract OCI labels from the Dockerfile using DockerfileParser.
"""
labels = {}
parser = DockerfileParser()
with open(dockerfile_path, 'r') as dockerfile:
parser.content = dockerfile.read()
labels = parser.labels
return labels
Binary file not shown.
13 changes: 13 additions & 0 deletions tests/packagedcode/data/docker/containerfile-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"name": "Unknown",
"version": "Unknown",
"license_expression": "GPL-2.0-only AND BSD-2-Clause",
"labels": {
"source": "https://github.com/kubernetes-sigs/blixt",
"licenses": "GPL-2.0-only,BSD-2-Clause"
}
}
]
10 changes: 10 additions & 0 deletions tests/packagedcode/data/docker/psql-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"license_expression": "MIT",
"labels": {
"source": "https://github.com/kreneskyp/ix"
}
}
]
5 changes: 5 additions & 0 deletions tests/packagedcode/data/docker/psql.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM postgres:15.3
LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix

RUN apt update -y && \
apt install -y postgresql-15-pgvector \
18 changes: 18 additions & 0 deletions tests/packagedcode/data/docker/test-dockerfile-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"name": "Kanboard",
"version": "1.2.42",
"license_expression": "MIT",
"labels": {
"source": "https://github.com/kanboard/kanboard",
"title": "Kanboard",
"description": "Kanboard is project management software that focuses on the Kanban methodology",
"vendor": "Kanboard",
"licenses": "MIT",
"url": "https://kanboard.org",
"documentation": "https://docs.kanboard.org"
}
}
]
80 changes: 80 additions & 0 deletions tests/packagedcode/data/docker/test.containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#Copied from https://github.com/kubernetes-sigs/blixt/blob


FROM rust:1.79-slim-bookworm as builder

ARG TARGETARCH
ARG LLVM_VERSION=19

RUN apt-get update
RUN apt-get install --yes \
build-essential \
protobuf-compiler \
pkg-config \
musl-tools \
clang \
wget

RUN apt install --yes lsb-release software-properties-common gnupg
RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh
RUN chmod +x /tmp/llvm.sh
RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all"

RUN rustup default stable
RUN rustup install nightly
RUN rustup component add rust-src --toolchain nightly
RUN --mount=type=cache,target=/root/.cargo/registry \
cargo install bpf-linker

WORKDIR /workspace
# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention.
# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's
# convention to a file for later usage.
RUN if [ "$TARGETARCH" = "amd64" ]; \
then echo "x86_64" >> arch; \
else echo "aarch64" >> arch; \
fi
RUN rustup target add $(eval cat arch)-unknown-linux-musl

COPY dataplane dataplane
COPY tools/udp-test-server tools/udp-test-server
COPY xtask xtask
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock
COPY .cargo .cargo

# We need to tell bpf-linker where it can find LLVM's shared library file.
# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48
ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib"
ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld"

RUN --mount=type=cache,target=/workspace/target/ \
--mount=type=cache,target=/root/.cargo/registry \
cargo xtask build-ebpf --release
RUN --mount=type=cache,target=/workspace/target/ \
--mount=type=cache,target=/root/.cargo/registry \
RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \
--workspace \
--exclude ebpf \
--release \
--target=$(eval cat arch)-unknown-linux-musl
RUN --mount=type=cache,target=/workspace/target/ \
cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release

FROM alpine

LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt
LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause

WORKDIR /opt/blixt/

COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane

COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0
COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause

ENTRYPOINT ["/opt/blixt/dataplane"]
36 changes: 36 additions & 0 deletions tests/packagedcode/data/docker/test.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#Copied from https://github.com/kanboard/kanboard

FROM alpine:3.21

LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard
LABEL org.opencontainers.image.title=Kanboard
LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology"
LABEL org.opencontainers.image.vendor=Kanboard
LABEL org.opencontainers.image.licenses=MIT
LABEL org.opencontainers.image.url=https://kanboard.org
LABEL org.opencontainers.image.documentation=https://docs.kanboard.org

VOLUME /var/www/app/data
VOLUME /var/www/app/plugins
VOLUME /etc/nginx/ssl

EXPOSE 80 443

ARG VERSION

RUN apk --no-cache --update add \
tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \
php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \
php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \
php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \
rm -rf /var/www/localhost && \
rm -f /etc/php83/php-fpm.d/www.conf && \
ln -sf /usr/bin/php83 /usr/bin/php

ADD . /var/www/app
ADD docker/ /

RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD []
60 changes: 60 additions & 0 deletions tests/packagedcode/test_dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from packagedcode import dockerfile
import pytest
import os.path
import json
from pathlib import Path
from packagedcode.dockerfile import DockerfileHandler

class TestDockerfileHandler:

def get_test_loc(self, path):
return Path(os.path.join(os.path.dirname(__file__), 'data'))

def load_expected(self, expected_file):
with open(expected_file) as f:
return json.load(f)

def test_is_datafile(self):
dockerfiles = [
'test.dockerfile',
'test.containerfile',
'psql.dockerfile'
]
for dockerfile in dockerfiles:
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
assert DockerfileHandler.is_datafile(str(test_file))

def test_parse_dockerfile(self):
test_files = [
('test.dockerfile', 'test-dockerfile-expected.json'),
('test.containerfile', 'containerfile-expected.json'),
('psql.dockerfile', 'psql-expected.json')
]
for dockerfile, expected in test_files:
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
expected_loc = self.get_test_loc(f'data/docker/{expected}')
packages = list(DockerfileHandler.parse(str(test_file)))
expected_packages = self.load_expected(expected_loc)
assert packages == expected_packages

def test_extract_oci_labels_from_dockerfile(self, mocker):
dockerfiles = [
'test.dockerfile',
'test.containerfile',
'psql.dockerfile'
]
for dockerfile in dockerfiles:
dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}')
labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path))
expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}')
expected_labels = self.load_expected(expected_loc)[0]['labels']
assert labels == expected_labels
Loading