Skip to content

Commit

Permalink
Automatically update OSV from CVE Services
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmlarson authored Jun 14, 2024
1 parent 43e5315 commit 99d895e
Show file tree
Hide file tree
Showing 4 changed files with 539 additions and 51 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Sync
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
jobs:
update-osv-from-cve:
name: "Update OSV from CVE"
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
- uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4
with:
python-version: 3.12

- run: |
python -m pip install -r tools/requirements.txt
python tools/import-from-cve.py
env:
CVE_ENV: "prod"
CVE_USERNAME: "[email protected]"
CVE_API_TOKEN: ${{ secrets.CVE_API_TOKEN }}
- uses: peter-evans/create-pull-request@6d6857d36972b65feb161a90e484f2984215f83e # v6
with:
title: "Update OSV records from CVE"
commit-message: "Update OSV records from CVE"
reviewers: sethmlarson,ewdurbin
129 changes: 84 additions & 45 deletions tools/import-from-cve.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,77 @@
"""Tool which imports OSV data from PSF CVE Numbering Authority CVEs"""

import copy
import json
import os
import re
import sys
import typing
from pathlib import Path

import cvelib.cve_api
import osv_utils
import urllib3

http = urllib3.PoolManager()
ADVISORIES_DIR = Path(__file__).parent.parent / "advisories"
CVE_API_TOKEN = os.environ["CVE_API_TOKEN"]
CVE_USERNAME = os.environ["CVE_USERNAME"]
CVE_ENV = os.environ.get("CVE_ENV", "prod")

HTTP = urllib3.PoolManager()
CVE_API = cvelib.cve_api.CveApi(
org="PSF",
username=CVE_USERNAME,
api_key=CVE_API_TOKEN,
env=CVE_ENV,
)


def main():
fetch_osv_from_cve(sys.argv[1])
for cve_id, cve_record in published_cpython_cves():
update_osv_from_cve(cve_id, cve_record)


def published_cpython_cves() -> typing.Iterable[tuple[str, dict[str, typing.Any]]]:
"""Iterate over the list of published CVEs for CPython"""

for cve_ref in CVE_API.list_cves(state="PUBLISHED"):
cve_id = cve_ref["cve_id"]
cve_record = CVE_API.show_cve_record(cve_id)

# Skip non-CPython CVEs
if not any(
affected["product"] == "CPython"
for affected in cve_record["containers"]["cna"]["affected"]
):
continue

yield cve_id, cve_record


def fetch_osv_from_cve(cve_id):
# Fetch the CVE JSON from the GitHub mirror.
CVE, year, id = cve_id.split("-")
assert CVE == "CVE", cve_id
id_prefix = id[:-3] + "xxx"
resp = http.request(
"GET",
f"https://raw.githubusercontent.com/CVEProject/cvelistV5/main/cves/{year}/{id_prefix}/{cve_id}.json",
def update_osv_from_cve(cve_id: str, cve_record: dict[str, typing.Any]) -> None:
osv_id = osv_utils.get_osv_id(
"python", lambda osv: cve_id in osv.get("aliases", ())
)
if resp.status == 404:
return
assert resp.status == 200, resp.status
cve_json = resp.json()
cve_cna = cve_json["containers"]["cna"]
cve_meta = cve_json["cveMetadata"]

# If this is a new OSV record then we rely on the allocator.
if not osv_id:
osv_id = f"PSF-0000-{cve_id}"
osv_json = {
"schema_version": "1.5.0",
"id": osv_id,
"aliases": [cve_id],
}

# Otherwise we can load the existing OSV record
else:
osv_json = json.loads((ADVISORIES_DIR / f"python/{osv_id}.json").read_text())

# Keep track of the existing JSON, so we don't end up updating the file
# with noisy formatting changes. Unfortunately some OSV tooling is opinionated about formatting.
existing_osv_json = copy.deepcopy(osv_json)

# Start updating the OSV record with CVE record data.
cve_cna = cve_record["containers"]["cna"]
cve_meta = cve_record["cveMetadata"]

details = None
if "descriptions" in cve_cna:
Expand All @@ -41,32 +82,20 @@ def fetch_osv_from_cve(cve_id):
for problem_type in cve_cna.get("problemTypes", []):
cwe_ids.extend(problem_type.get("cwdId", []))

osv_id = osv_utils.get_osv_id(
"python", lambda osv: cve_id in osv.get("aliases", ())
git_commit_re = re.compile(
r"https://github.com/python/cpython/commit/([a-f0-9]{20,})"
)
if not osv_id:
osv_id = f"PSF-0000-{cve_id}"
osv_json = {
"schema_version": "1.5.0",
"id": osv_id,
"aliases": [cve_id],
"published": f"{cve_meta['datePublished'].rstrip('Z')}Z",
"modified": f"{cve_meta['dateUpdated'].rstrip('Z')}Z",
"details": details,
"database_specific": {"cwe_ids": cwe_ids},
}

fixed_events = []
references = []
for ref in cve_cna["references"]:
ref_tags = ref.get("tags", ())
ref_type = "WEB"
ref_type = "WEB" # Default reference type for OSV.

if "patch" in ref_tags:
ref_type = "FIX"
fixed_events.append(
{
"fixed": re.search(
r"https://github.com/python/cpython/commit/([a-f0-9]{20,})",
"fixed": git_commit_re.search(
ref["url"],
).group(1)
}
Expand All @@ -77,22 +106,32 @@ def fetch_osv_from_cve(cve_id):
ref_type = "REPORT"
references.append({"type": ref_type, "url": ref["url"]})

osv_json["affected"] = [
osv_json.update(
{
"ranges": [
"published": f"{cve_meta['datePublished'].rstrip('Z')}Z",
"modified": f"{cve_meta['dateUpdated'].rstrip('Z')}Z",
"details": details,
"affected": [
{
"type": "GIT",
"repo": "https://github.com/python/cpython",
"events": [{"introduced": "0"}, *fixed_events],
"ranges": [
{
"type": "GIT",
"events": [{"introduced": "0"}, *fixed_events],
"repo": "https://github.com/python/cpython",
}
]
}
]
],
"references": references,
"database_specific": {"cwe_ids": cwe_ids},
}
]
osv_json["references"] = references
)

with (ADVISORIES_DIR / f"python/{osv_id}.json").open("w") as f:
f.truncate()
f.write(json.dumps(osv_json, indent=2))
# Only update if there's data differences.
if existing_osv_json != osv_json:
with (ADVISORIES_DIR / f"python/{osv_id}.json").open("w") as f:
f.truncate()
f.write(json.dumps(osv_json, indent=2))


if __name__ == "__main__":
Expand Down
6 changes: 5 additions & 1 deletion tools/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
urllib3
--only-binary :all:

cvelib
urllib3<2
pygithub
Loading

0 comments on commit 99d895e

Please sign in to comment.