Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extraction of license text from files. #193

Merged
merged 18 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ $ yarn cyclonedx
This causes information loss in trade-off shorter PURLs, which might improve ingesting these strings.
--output-reproducible Whether to go the extra mile and make the output reproducible.
This might result in loss of time- and random-based values.
--gather-license-texts Search for license files in components and include them as license evidence.
jkowalleck marked this conversation as resolved.
Show resolved Hide resolved
--verbose,-v Increase the verbosity of messages.
Use multiple times to increase the verbosity even more.

Expand Down
44 changes: 44 additions & 0 deletions src/_helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Copyright (c) OWASP Foundation. All Rights Reserved.

import { xfs } from '@yarnpkg/fslib'
import GitHost from 'hosted-git-info'
import { extname, parse } from 'path'

export async function writeAllSync (fd: number, data: string): Promise<number> {
const b = Buffer.from(data)
Expand Down Expand Up @@ -59,3 +60,46 @@ export function trySanitizeGitUrl (gitUrl: string): string {
gitInfo.auth = undefined
return gitInfo.toString()
}

// region MIME

export type MimeType = string

const MIME_TEXT_PLAIN: MimeType = 'text/plain'

const MAP_TEXT_EXTENSION_MIME: Readonly<Record<string, MimeType>> = {
'': MIME_TEXT_PLAIN,
// https://www.iana.org/assignments/media-types/media-types.xhtml
'.csv': 'text/csv',
'.htm': 'text/html',
'.html': 'text/html',
'.md': 'text/markdown',
'.txt': MIME_TEXT_PLAIN,
'.rst': 'text/prs.fallenstein.rst',
'.xml': 'text/xml', // not `application/xml` -- our scope is text!
// add more mime types above this line. pull-requests welcome!
// license-specific files
'.license': MIME_TEXT_PLAIN,
'.licence': MIME_TEXT_PLAIN
} as const

export function getMimeForTextFile (filename: string): MimeType | undefined {
return MAP_TEXT_EXTENSION_MIME[extname(filename).toLowerCase()]
}

const LICENSE_FILENAME_BASE = new Set(['licence', 'license'])
const LICENSE_FILENAME_EXT = new Set([
'.apache',
'.bsd',
'.gpl',
'.mit'
])

export function getMimeForLicenseFile (filename: string): MimeType | undefined {
const { name, ext } = parse(filename.toLowerCase())
return LICENSE_FILENAME_BASE.has(name) && LICENSE_FILENAME_EXT.has(ext)
? MIME_TEXT_PLAIN
: MAP_TEXT_EXTENSION_MIME[ext]
}

// endregion MIME
97 changes: 81 additions & 16 deletions src/builders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ Copyright (c) OWASP Foundation. All Rights Reserved.

// import submodules so to prevent load of unused not-tree-shakable dependencies - like 'AJV'
import type { FromNodePackageJson as PJB } from '@cyclonedx/cyclonedx-library/Builders'
import { ComponentType, ExternalReferenceType, LicenseAcknowledgement } from '@cyclonedx/cyclonedx-library/Enums'
import { AttachmentEncoding, ComponentType, ExternalReferenceType, LicenseAcknowledgement } from '@cyclonedx/cyclonedx-library/Enums'
import type { FromNodePackageJson as PJF } from '@cyclonedx/cyclonedx-library/Factories'
import { Bom, Component, ExternalReference, type License, Property, Tool } from '@cyclonedx/cyclonedx-library/Models'
import { Attachment, Bom, Component, ComponentEvidence, ExternalReference, type License, NamedLicense, Property, Tool } from '@cyclonedx/cyclonedx-library/Models'
import { BomUtility } from '@cyclonedx/cyclonedx-library/Utils'
import { Cache, type FetchOptions, type Locator, type LocatorHash, type Package, type Project, structUtils, ThrowReport, type Workspace, YarnVersion } from '@yarnpkg/core'
import { ppath } from '@yarnpkg/fslib'
Expand All @@ -30,17 +30,19 @@ import normalizePackageData from 'normalize-package-data'
import type { PackageURL } from 'packageurl-js'

import { getBuildtimeInfo } from './_buildtimeInfo'
import { isString, tryRemoveSecretsFromUrl, trySanitizeGitUrl } from './_helpers'
import { getMimeForLicenseFile, isString, tryRemoveSecretsFromUrl, trySanitizeGitUrl } from './_helpers'
import { wsAnchoredPackage } from './_yarnCompat'
import { PropertyNames, PropertyValueBool } from './properties'

type ManifestFetcher = (pkg: Package) => Promise<any>
type LicenseEvidenceFetcher = (pkg: Package) => AsyncGenerator<License>

interface BomBuilderOptions {
omitDevDependencies?: BomBuilder['omitDevDependencies']
metaComponentType?: BomBuilder['metaComponentType']
reproducible?: BomBuilder['reproducible']
shortPURLs?: BomBuilder['shortPURLs']
gatherLicenseTexts?: BomBuilder['gatherLicenseTexts']
}

export class BomBuilder {
Expand All @@ -52,6 +54,7 @@ export class BomBuilder {
metaComponentType: ComponentType
reproducible: boolean
shortPURLs: boolean
gatherLicenseTexts: boolean

console: Console

Expand All @@ -70,13 +73,15 @@ export class BomBuilder {
this.metaComponentType = options.metaComponentType ?? ComponentType.Application
this.reproducible = options.reproducible ?? false
this.shortPURLs = options.shortPURLs ?? false
this.gatherLicenseTexts = options.gatherLicenseTexts ?? false

this.console = console_
}

async buildFromWorkspace (workspace: Workspace): Promise<Bom> {
// @TODO make switch to disable load from fs
const fetchManifest: ManifestFetcher = await this.makeManifestFetcher(workspace.project)
const fetchLicenseEvidences: LicenseEvidenceFetcher = await this.makeLicenseEvidenceFetcher(workspace.project)

const setLicensesDeclared = function (license: License): void {
license.acknowledgement = LicenseAcknowledgement.Declared
Expand Down Expand Up @@ -119,7 +124,8 @@ export class BomBuilder {
}
for await (const component of this.gatherDependencies(
rootComponent, rootPackage,
workspace.project, fetchManifest
workspace.project,
fetchManifest, fetchLicenseEvidences
)) {
component.licenses.forEach(setLicensesDeclared)

Expand Down Expand Up @@ -163,33 +169,90 @@ export class BomBuilder {
}
}

readonly #LICENSE_FILENAME_PATTERN = /^(?:UN)?LICEN[CS]E|.\.LICEN[CS]E$|^NOTICE$/i

private async makeLicenseEvidenceFetcher (project: Project): Promise<LicenseEvidenceFetcher> {
const fetcher = project.configuration.makeFetcher()
const fetcherOptions: FetchOptions = {
project,
fetcher,
cache: await Cache.find(project.configuration),
checksums: project.storedChecksums,
report: new ThrowReport(),
cacheOptions: { skipIntegrityCheck: true }
}
const LICENSE_FILENAME_PATTERN = this.#LICENSE_FILENAME_PATTERN
return async function * (pkg: Package): AsyncGenerator<License> {
const { packageFs, prefixPath, releaseFs } = await fetcher.fetch(pkg, fetcherOptions)
try {
// option `withFileTypes:true` is not supported and causes crashes
const files = packageFs.readdirSync(prefixPath)
for (const file of files) {
if (!LICENSE_FILENAME_PATTERN.test(file)) {
continue
}

const contentType = getMimeForLicenseFile(file)
if (contentType === undefined) {
continue
}

const fp = ppath.join(prefixPath, file)
yield new NamedLicense(
`file: ${file}`,
{
text: new Attachment(
packageFs.readFileSync(fp).toString('base64'),
{
contentType,
encoding: AttachmentEncoding.Base64
}
)
})
}
} finally {
if (releaseFs !== undefined) {
releaseFs()
}
}
}
}

private async makeComponentFromPackage (
pkg: Package,
fetchManifest: ManifestFetcher,
fetchLicenseEvidence: LicenseEvidenceFetcher,
type?: ComponentType | undefined
): Promise<Component | false | undefined> {
const data = await fetchManifest(pkg)
const manifest = await fetchManifest(pkg)
// the data in the manifest might be incomplete, so lets set the properties that yarn discovered and fixed
/* eslint-disable-next-line @typescript-eslint/strict-boolean-expressions */
data.name = pkg.scope ? `@${pkg.scope}/${pkg.name}` : pkg.name
data.version = pkg.version
return this.makeComponent(pkg, data, type)
manifest.name = pkg.scope ? `@${pkg.scope}/${pkg.name}` : pkg.name
manifest.version = pkg.version
const component = this.makeComponent(pkg, manifest, type)
if (this.gatherLicenseTexts && component instanceof Component) {
component.evidence = new ComponentEvidence()
for await (const le of fetchLicenseEvidence(pkg)) {
component.evidence.licenses.add(le)
}
}
return component
}

private makeComponent (locator: Locator, data: any, type?: ComponentType | undefined): Component | false | undefined {
private makeComponent (locator: Locator, manifest: any, type?: ComponentType | undefined): Component | false | undefined {
// work with a deep copy, because `normalizePackageData()` might modify the data
const dataC = structuredClonePolyfill(data)
normalizePackageData(dataC as normalizePackageData.Input)
const manifestC = structuredClonePolyfill(manifest)
normalizePackageData(manifestC as normalizePackageData.Input)
// region fix normalizations
if (isString(data.version)) {
if (isString(manifest.version)) {
// allow non-SemVer strings
dataC.version = data.version.trim()
manifestC.version = manifest.version.trim()
}
// endregion fix normalizations

// work with a deep copy, because `normalizePackageData()` might modify the data
const component = this.componentBuilder.makeComponent(
dataC as normalizePackageData.Package, type)
manifestC as normalizePackageData.Package, type)
if (component === undefined) {
this.console.debug('DEBUG | skip broken component: %j', locator)
return undefined
Expand Down Expand Up @@ -297,7 +360,8 @@ export class BomBuilder {
async * gatherDependencies (
component: Component, pkg: Package,
project: Project,
fetchManifest: ManifestFetcher
fetchManifest: ManifestFetcher,
fetchLicenseEvidences: LicenseEvidenceFetcher
): AsyncGenerator<Component> {
// ATTENTION: multiple packages may have the same `identHash`, but the `locatorHash` is unique.
const knownComponents = new Map<LocatorHash, Component>([[pkg.locatorHash, component]])
Expand All @@ -309,7 +373,8 @@ export class BomBuilder {
let depComponent = knownComponents.get(depPkg.locatorHash)
if (depComponent === undefined) {
const _depIDN = structUtils.prettyLocatorNoColors(depPkg)
const _depC = await this.makeComponentFromPackage(depPkg, fetchManifest)
const _depC = await this.makeComponentFromPackage(depPkg,
fetchManifest, fetchLicenseEvidences)
if (_depC === false) {
// shall be skipped
this.console.debug('DEBUG | skip impossible component %j', _depIDN)
Expand Down
8 changes: 7 additions & 1 deletion src/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ export class MakeSbomCommand extends Command<CommandContext> {
'This might result in loss of time- and random-based values.'
})

gatherLicenseTexts = Option.Boolean('--gather-license-texts', false, {
description: 'Search for license files in components and include them as license evidence.'
AugustusKling marked this conversation as resolved.
Show resolved Hide resolved
})

verbosity = Option.Counter('--verbose,-v', 1, {
description: 'Increase the verbosity of messages.\n' +
'Use multiple times to increase the verbosity even more.'
Expand Down Expand Up @@ -142,6 +146,7 @@ export class MakeSbomCommand extends Command<CommandContext> {
mcType: this.mcType,
shortPURLs: this.shortPURLs,
outputReproducible: this.outputReproducible,
gatherLicenseTexts: this.gatherLicenseTexts,
verbosity: this.verbosity,
projectDir
})
Expand Down Expand Up @@ -171,7 +176,8 @@ export class MakeSbomCommand extends Command<CommandContext> {
omitDevDependencies: this.production,
metaComponentType: this.mcType,
reproducible: this.outputReproducible,
shortPURLs: this.shortPURLs
shortPURLs: this.shortPURLs,
gatherLicenseTexts: this.gatherLicenseTexts
},
myConsole
)).buildFromWorkspace(workspace)
Expand Down
10 changes: 8 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@ Test files must follow the pattern `**.{spec,test}.[cm]?js`, to be picked up.
Test runner is `mocha`, configured in [mocharc file](../.mocharc.js).

```shell
npm test
yarn run test
```

To run specific tests only
```shell
yarn run test:node --grep "testname"
```

### Snapshots

Some tests check against snapshots.
To update these, set the env var `CYARN_TEST_UPDATE_SNAPSHOTS` to a non-falsy value.

like so:
```shell
CYARN_TEST_UPDATE_SNAPSHOTS=1 npm test
CYARN_TEST_UPDATE_SNAPSHOTS=1 yarn run test
```
Loading
Loading