Chore: Resolves #13866: Create a tool that lists dependencies of a package and its licenses (#13874)

This commit is contained in:
Henry Heino
2025-12-11 07:16:41 -08:00
committed by GitHub
parent 30c5031611
commit e2c933db82
7 changed files with 381 additions and 233 deletions

View File

@@ -1856,6 +1856,7 @@ packages/tools/generate-database-types.js
packages/tools/generate-images.js
packages/tools/git-changelog.test.js
packages/tools/git-changelog.js
packages/tools/licenses/buildReport.js
packages/tools/licenses/getLicenses.js
packages/tools/licenses/licenseChecker.js
packages/tools/licenses/licenseOverrides/fontAwesomeOverride/index.js

1
.gitignore vendored
View File

@@ -1828,6 +1828,7 @@ packages/tools/generate-database-types.js
packages/tools/generate-images.js
packages/tools/git-changelog.test.js
packages/tools/git-changelog.js
packages/tools/licenses/buildReport.js
packages/tools/licenses/getLicenses.js
packages/tools/licenses/licenseChecker.js
packages/tools/licenses/licenseOverrides/fontAwesomeOverride/index.js

View File

@@ -0,0 +1,288 @@
import getLicenses, { DependencyType, LicenseInfo } from './getLicenses';
import { readFile, readdir } from 'fs/promises';
import { dirname, join } from 'path';
import licenseOverrides from './licenseOverrides';
import { LicenseOverride } from './licenseOverrides/types';
import { exists } from 'fs-extra';
import matchApache2 from './utils/matchApache2';
import matchMit from './utils/matchMit';
// Important: Review the output of this tool for correctness
interface PackageInfo extends LicenseInfo {
packageName: string;
}
const cachedFetchResults: Map<string, string|null> = new Map();
const readOrFetchRepositoryFile = async (pkg: PackageInfo, allowedPaths: string[]): Promise<string|null> => {
for (const path of allowedPaths) {
const targetPath = join(pkg.path, path);
if (await exists(targetPath)) {
const licenseText = await readFile(targetPath, 'utf8');
return licenseText;
}
}
for (const path of allowedPaths) {
const cacheKey = `${pkg.repository}/${path}`;
if (cachedFetchResults.has(cacheKey)) {
const cacheValue = cachedFetchResults.get(cacheKey);
if (cacheValue) {
return cacheValue;
}
// Otherwise, try the next allowed path
} else {
const repositoryMatch =
pkg.repository?.match(/^git@github\.com:([^/]+)\/([^.]+)(?:\.git)?$/)
?? pkg.repository?.match(/^https:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)$/);
if (repositoryMatch) {
const organization = repositoryMatch[1];
const project = repositoryMatch[2];
console.error('Attempting to fetch', path, 'for repository', pkg.repository, 'from GitHub...');
const noticeResult = await fetch(`https://raw.githubusercontent.com/${organization}/${project}/HEAD/${path}`);
if (noticeResult.ok) {
const result = await noticeResult.text();
cachedFetchResults.set(cacheKey, result);
console.error('Fetch success.');
return result;
} else {
cachedFetchResults.set(cacheKey, null);
console.error(`Fetch failed: ${noticeResult.statusText}`);
}
} else {
console.warn('No repository for package', pkg.packageName);
}
}
}
return null;
};
const getNotice = async (pkg: PackageInfo) => {
// Some package overrides lack a `path`
if (!pkg.path) {
console.error('Not including notices for', pkg.packageName, ' -- no path.');
return '';
}
const files = await readdir(pkg.path);
const noticeLines = [];
for (const fileName of files) {
if (/NOTICE(?:S)?(?:\.\w+)?$/i.exec(fileName)) {
const noticeContent = await readFile(join(pkg.path, fileName), 'utf8');
noticeLines.push(`${fileName}:\n\n${noticeContent}`);
}
}
// If no notices were found, there may still be such a file in the package's repository
// (as is the case for some Amazon AWS packages).
if (noticeLines.length === 0 && pkg.licenses.includes('Apache')) {
noticeLines.push(await readOrFetchRepositoryFile(pkg, ['NOTICE', 'NOTICE.md']));
}
return noticeLines.join('\n\n');
};
const trimBeforeLicenseHeader = (text: string) => {
const header = text.match(/#+ License[\n]/i) ?? text.match(/[\n]License[\n]--+[\n]/i);
if (header) {
return text.substring(header.index);
} else {
return text;
}
};
const readLicense = async (pkg: PackageInfo) => {
let result = '';
if (pkg.licenseText && !pkg.licenses.includes('UNKNOWN')) {
result = pkg.licenseText;
}
const resolvedLicenseToReadme = pkg.licenseFile && pkg.licenseFile.match(/\/README(\.\w+)?$/);
// By default, license-checker-rseidelsohn uses the README when the license can't be
// found. This is often wrong, and we can do better:
if (pkg.path && (!pkg.licenseFile || resolvedLicenseToReadme)) {
result = await readOrFetchRepositoryFile(pkg, ['LICENSE', 'LICENSE.md', 'LICENSE.txt', 'MIT-LICENSE.txt']);
}
if (!result && pkg.licenseFile) {
result = await readFile(pkg.licenseFile, 'utf8');
if (resolvedLicenseToReadme) {
result = trimBeforeLicenseHeader(result);
}
}
return result;
};
export enum EntryLicenseType {
Mit = 'MIT',
Apache2 = 'Apache2',
Other = '',
}
type LicenseDetails = {
type: EntryLicenseType.Apache2;
notice: string;
appendix: string;
} | {
type: EntryLicenseType.Mit;
notice: string;
copyright: string;
} | {
type: EntryLicenseType.Other;
notice: string;
fullText: string;
};
interface ReportEntry {
packageNames: string[];
license: LicenseDetails;
licenseId: string;
packageSource: string;
}
type Report = Record<DependencyType, ReportEntry[]>;
const buildReport = async (packageNames: string[]): Promise<Report> => {
const monorepoRootDir = dirname(dirname(dirname(__dirname)));
const packagesRootDir = join(monorepoRootDir, 'packages');
const packageOverrides: LicenseOverride[] = [];
for (const packageToCheck of packageNames) {
if (licenseOverrides[packageToCheck]) {
packageOverrides.push(...licenseOverrides[packageToCheck]);
}
}
const report: Report = { [DependencyType.Production]: [], [DependencyType.Development]: [] };
for (const mode of [DependencyType.Production, DependencyType.Development]) {
const output: ReportEntry[] = [];
report[mode] = output;
const dependencyLicenses: Map<string, LicenseInfo> = new Map();
for (const packageName of packageNames) {
const packageDir = join(packagesRootDir, packageName);
const packageLicenses = await getLicenses(packageDir, mode, [], packageOverrides);
for (const packageName in packageLicenses) {
dependencyLicenses.set(packageName, packageLicenses[packageName]);
}
}
// Group into per-repository (some dependencies are broken up into multiple
// packages)
const repositoryToPackages: Map<string, PackageInfo[]> = new Map();
for (const [packageName, packageData] of dependencyLicenses) {
const packageInfo = {
packageName,
...packageData,
};
const repositoryData = repositoryToPackages.get(packageData.repository);
if (repositoryData) {
repositoryData.push(packageInfo);
} else {
repositoryToPackages.set(packageData.repository, [packageInfo]);
}
}
for (const [repository, repositoryData] of repositoryToPackages) {
let repositoryOutput: string[] = [];
let relevantPackages: string[] = [];
let previousLicenseId: string|null = null;
let lastLicenseData: LicenseDetails|null = null;
const flushOutput = () => {
if (relevantPackages.length > 0 || repositoryOutput.length > 0) {
if (!lastLicenseData) {
throw new Error('lastLicenseData is not set');
}
output.push({
packageNames: [...relevantPackages],
license: lastLicenseData,
licenseId: previousLicenseId,
packageSource: repository && repository !== 'null' ? repository : null,
});
}
relevantPackages = [];
repositoryOutput = [];
lastLicenseData = null;
};
// Try to fetch LICENSE and NOTICE from node_modules
for (const pkg of repositoryData) {
const currentNotice = await getNotice(pkg);
let currentLicenses = pkg.licenses;
const licenseText = await readLicense(pkg) ?? 'NONE AVAILABLE';
const addOrSetLicense = (matcher: RegExp, identifier: string) => {
if (!currentLicenses.match(matcher)) {
// If the license ID was previously unknown, it has now been identified
if (currentLicenses === 'UNKNOWN') {
currentLicenses = '';
}
currentLicenses = [currentLicenses, identifier].filter(license => license.trim()).join(' AND ');
}
};
// Determine the license data and additional information based on the full license text.
// This allows extracting copyright and other information from the full license.
let licenseData: LicenseDetails;
const apache2Match = matchApache2(licenseText);
if (apache2Match) {
licenseData = {
type: EntryLicenseType.Apache2,
appendix: apache2Match.appendix || null,
notice: currentNotice,
};
addOrSetLicense(/apache.?2/i, 'Apache-2');
} else {
const mitMatch = matchMit(licenseText);
if (mitMatch) {
licenseData = {
type: EntryLicenseType.Mit,
copyright: mitMatch.copyright,
notice: currentNotice,
};
addOrSetLicense(/mit/i, 'MIT');
} else {
licenseData = {
type: EntryLicenseType.Other,
fullText: licenseText,
notice: currentNotice,
};
}
}
const noticeChanged = lastLicenseData?.notice !== licenseData.notice;
const copyrightChanged = lastLicenseData?.type === EntryLicenseType.Mit
&& licenseData.type === EntryLicenseType.Mit
&& lastLicenseData.copyright !== lastLicenseData.copyright;
if (previousLicenseId !== currentLicenses || noticeChanged || copyrightChanged) {
flushOutput();
previousLicenseId = currentLicenses;
lastLicenseData = licenseData;
}
relevantPackages.push(pkg.packageName);
}
flushOutput();
}
}
return report;
};
export default buildReport;

View File

@@ -58,7 +58,7 @@ const toLicensesMap = (info: licenseChecker.ModuleInfos, overrides: LicenseOverr
const getLicenses = async (
directory: string,
mode: DependencyType,
excludeLicenses: string[],
excludePackages: string[],
overrides: LicenseOverride[] = [],
) => {
@@ -68,7 +68,7 @@ const getLicenses = async (
licenseChecker.init({
start: directory,
excludeLicenses: excludeLicenses.join(','),
excludePackages: excludePackages.join(','),
excludePackagesStartingWith: '@joplin/',
development: isDevelopmentMode,
production: !isDevelopmentMode,

View File

@@ -42,12 +42,6 @@ const allPackageOverrides: LicenseOverride[] = [
// license field was not changed).
excludeDevelopment(/^fb-watchman/),
excludeDevelopment(/^bser/),
mitLicenseOverride(
'tkwidgets',
'https://github.com/laurent22/tkwidgets',
'2017-2018 Laurent Cozic',
/^tkwidgets[@]?.*$/,
),
];
const licenseOverrides: LicenseOverrides = {
@@ -92,6 +86,12 @@ const licenseOverrides: LicenseOverrides = {
],
'app-cli': [
...allPackageOverrides,
mitLicenseOverride(
'tkwidgets',
'https://github.com/laurent22/tkwidgets',
'2017-2018 Laurent Cozic',
/^tkwidgets[@]?.*$/,
),
],
};

View File

@@ -1,128 +1,17 @@
import getLicenses, { DependencyType, LicenseInfo } from './getLicenses';
import { readFile, readdir } from 'fs/promises';
import { dirname, join } from 'path';
import licenseOverrides from './licenseOverrides';
import { LicenseOverride } from './licenseOverrides/types';
import { exists, writeFile } from 'fs-extra';
import matchApache2 from './utils/matchApache2';
import { readFile } from 'fs/promises';
import { basename, dirname, join } from 'path';
import { writeFile } from 'fs-extra';
import apache2 from './licenseText/apache2';
import agplv3 from './licenseText/agplv3';
import matchMit from './utils/matchMit';
import mit from './licenseText/mit';
import buildReport, { EntryLicenseType } from './buildReport';
import yargs = require('yargs');
import { hideBin } from 'yargs/helpers';
import { DependencyType } from './getLicenses';
// Important: Review the output of this tool for correctness
interface PackageInfo extends LicenseInfo {
packageName: string;
}
const cachedFetchResults: Map<string, string|null> = new Map();
const readOrFetchRepositoryFile = async (pkg: PackageInfo, allowedPaths: string[]): Promise<string|null> => {
for (const path of allowedPaths) {
const targetPath = join(pkg.path, path);
if (await exists(targetPath)) {
const licenseText = await readFile(targetPath, 'utf8');
return licenseText;
}
}
for (const path of allowedPaths) {
const cacheKey = `${pkg.repository}/${path}`;
if (cachedFetchResults.has(cacheKey)) {
const cacheValue = cachedFetchResults.get(cacheKey);
if (cacheValue) {
return cacheValue;
}
// Otherwise, try the next allowed path
} else {
const repositoryMatch =
pkg.repository?.match(/^git@github\.com:([^/]+)\/([^.]+)(?:\.git)?$/)
?? pkg.repository?.match(/^https:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)$/);
if (repositoryMatch) {
const organization = repositoryMatch[1];
const project = repositoryMatch[2];
console.info('Attempting to fetch', path, 'for repository', pkg.repository, 'from GitHub...');
const noticeResult = await fetch(`https://raw.githubusercontent.com/${organization}/${project}/HEAD/${path}`);
if (noticeResult.ok) {
const result = await noticeResult.text();
cachedFetchResults.set(cacheKey, result);
console.error('Fetch success.');
return result;
} else {
cachedFetchResults.set(cacheKey, null);
console.error(`Fetch failed: ${noticeResult.statusText}`);
}
} else {
console.warn('No repository for package', pkg.packageName);
}
}
}
return null;
};
const getNotice = async (pkg: PackageInfo) => {
// Some package overrides lack a `path`
if (!pkg.path) {
console.log('Not including notices for', pkg.packageName, ' -- no path.');
return '';
}
const files = await readdir(pkg.path);
const noticeLines = [];
for (const fileName of files) {
if (/NOTICE(?:S)?(?:\.\w+)?$/i.exec(fileName)) {
const noticeContent = await readFile(join(pkg.path, fileName), 'utf8');
noticeLines.push(`${fileName}:\n\n${noticeContent}`);
}
}
// If no notices were found, there may still be such a file in the package's repository
// (as is the case for some Amazon AWS packages).
if (noticeLines.length === 0 && pkg.licenses.includes('Apache')) {
noticeLines.push(await readOrFetchRepositoryFile(pkg, ['NOTICE', 'NOTICE.md']));
}
return noticeLines.join('\n\n');
};
const trimBeforeLicenseHeader = (text: string) => {
const header = text.match(/#+ License[\n]/i);
if (header) {
return text.substring(header.index);
} else {
return text;
}
};
const readLicense = async (pkg: PackageInfo) => {
let result = '';
if (pkg.licenseText && !pkg.licenses.includes('UNKNOWN')) {
result = pkg.licenseText;
}
const resolvedLicenseToReadme = pkg.licenseFile && pkg.licenseFile.match(/\/README(\.\w+)?$/);
// By default, license-checker-rseidelsohn uses the README when the license can't be
// found. This is often wrong, and we can do better:
if (pkg.path && (!pkg.licenseFile || resolvedLicenseToReadme)) {
result = await readOrFetchRepositoryFile(pkg, ['LICENSE', 'LICENSE.md', 'LICENSE.txt', 'MIT-LICENSE.txt']);
}
if (!result && pkg.licenseFile) {
result = await readFile(pkg.licenseFile, 'utf8');
if (resolvedLicenseToReadme) {
result = trimBeforeLicenseHeader(result);
}
}
return result;
};
const toCodeBlock = (content: string) => {
const internalFences = [...content.matchAll(/(^|\n)[`]+/g)];
const longestFence = internalFences
@@ -130,15 +19,12 @@ const toCodeBlock = (content: string) => {
return `${longestFence}\`\n${content}\n${longestFence}\``;
};
const buildForPackage = async (packageNames: string[]): Promise<string> => {
const buildFullReport = async (packageNames: string[]): Promise<string> => {
const monorepoRootDir = dirname(dirname(dirname(__dirname)));
const packagesRootDir = join(monorepoRootDir, 'packages');
const output: string[] = [];
// Identifiers for licenses that can be excluded.
const excludeLicenses: string[] = [];
output.push('# License information');
output.push('This file summarizes the licenses of Joplin and the direct and indirect dependencies of packages in the Joplin monorepo. Many of these dependencies are used only at build time.');
@@ -153,99 +39,33 @@ const buildForPackage = async (packageNames: string[]): Promise<string> => {
output.push('This license applies only to **Joplin Server** -- the files in the `packages/server` directory.');
output.push(toCodeBlock(await readFile(join(packagesRootDir, 'server', 'LICENSE.md'), 'utf-8')));
const packageOverrides: LicenseOverride[] = [];
for (const packageToCheck of packageNames) {
if (licenseOverrides[packageToCheck]) {
packageOverrides.push(...licenseOverrides[packageToCheck]);
}
}
const report = await buildReport(packageNames);
for (const mode of [DependencyType.Production, DependencyType.Development]) {
for (const [mode, entries] of Object.entries(report)) {
output.push('', `## ${mode} dependencies`, '');
output.push(`Dependencies in this section are listed as "${mode}" dependencies in one of the \`package.json\` files of a Joplin package, or a dependency of a Joplin package.`);
const dependencyLicenses: Record<string, LicenseInfo> = {};
for (const packageName of packageNames) {
const packageDir = join(packagesRootDir, packageName);
const packageLicenses = await getLicenses(packageDir, mode, excludeLicenses, packageOverrides);
for (const packageName in packageLicenses) {
dependencyLicenses[packageName] = packageLicenses[packageName];
for (const entry of entries) {
let licenseShortText = '';
if (entry.license.type === EntryLicenseType.Mit) {
licenseShortText = `${toCodeBlock(entry.license.copyright)}\n\nSee [Appendix B](#appendix-b-the-mit-license) for the full MIT license.`;
} else if (entry.license.type === EntryLicenseType.Apache2) {
licenseShortText = `${
entry.license.appendix ?
`APPENDIX: This package's copy of the Apache 2 license includes the following appendix:\n${toCodeBlock(entry.license.appendix)}\n\n`
: ''
}See [Appendix A](#appendix-a-the-apache-2-license) for the Apache 2 license.`;
} else {
licenseShortText = toCodeBlock(entry.license.fullText);
}
}
// Group into per-repository (some dependencies are broken up into multiple
// packages)
const repositoryToPackages: Record<string, PackageInfo[]> = Object.create(null);
for (const packageName in dependencyLicenses) {
const packageData = dependencyLicenses[packageName];
repositoryToPackages[packageData.repository] ??= [];
repositoryToPackages[packageData.repository].push({
packageName,
...packageData,
});
}
for (const repository in repositoryToPackages) {
let repositoryOutput: string[] = [];
let relevantPackages: string[] = [];
const flushOutput = () => {
if (relevantPackages.length > 0 || repositoryOutput.length > 0) {
output.push(`### ${relevantPackages.join(', ')}`);
output.push(repository && repository !== 'null' ? `From ${repository}.` : '');
output.push(...repositoryOutput);
}
relevantPackages = [];
repositoryOutput = [];
};
// Try to fetch LICENSE and NOTICE from node_modules
let previousLicense: string|null = null;
let previousNotice: string|null = null;
for (const pkg of repositoryToPackages[repository]) {
const currentNotice = await getNotice(pkg);
if (previousLicense !== pkg.licenses || previousNotice !== currentNotice) {
flushOutput();
repositoryOutput.push(`**${pkg.licenses}**:`);
const licenseText = await readLicense(pkg) ?? 'NONE AVAILABLE';
// The Apache2 license is both long and common. Extracting it to an Appendix can significantly
// decrease the size of the license statement.
const apache2Match = matchApache2(licenseText);
if (apache2Match) {
repositoryOutput.push('See [Appendix A](#appendix-a-the-apache-2-license) for the Apache 2 license.');
if (
apache2Match.appendix) {
repositoryOutput.push(
'This package\'s copy of the Apache 2 license includes the following appendix:',
toCodeBlock(apache2Match.appendix),
);
}
} else {
const mitMatch = matchMit(licenseText);
if (mitMatch) {
repositoryOutput.push(`Copyright: ${mitMatch.copyright}`);
repositoryOutput.push('See [Appendix B](#appendix-b-the-mit-license) for the full MIT license.');
} else {
repositoryOutput.push(toCodeBlock(licenseText));
}
}
if (currentNotice) {
repositoryOutput.push('**NOTICE**:', toCodeBlock(currentNotice));
}
previousLicense = pkg.licenses;
previousNotice = currentNotice;
}
relevantPackages.push(pkg.packageName);
}
flushOutput();
output.push(...[
`### ${entry.packageNames.join(', ')}`,
entry.packageSource ? `From ${entry.packageSource}.` : '',
`**${entry.licenseId}**:`,
licenseShortText,
entry.license.notice ? `**NOTICE**:\n${toCodeBlock(entry.license.notice)}` : '',
].filter(line => !!line.trim()));
}
}
@@ -258,20 +78,58 @@ const buildForPackage = async (packageNames: string[]): Promise<string> => {
return output.join('\n\n');
};
const licenseStatementBuilder = async () => {
const baseDir = dirname(dirname(dirname(__dirname)));
const outputPath = join(baseDir, 'readme', 'licenses.md');
const result = await buildForPackage([
'app-mobile',
'app-cli',
'app-desktop',
'server',
]);
console.log('Writing...');
await writeFile(outputPath, result, 'utf-8');
const csvQuote = (column: string) => {
// See https://en.wikipedia.org/wiki/Comma-separated_values#Specification
return `"${column.replace(/"/g, '""')}"`;
};
licenseStatementBuilder().catch(error => {
console.error('Error', error);
process.exit(1);
});
void yargs()
.scriptName(basename(__filename))
.strict()
.demandCommand()
.usage('$0 <cmd>')
.command(
'update-report',
'Rebuilds the license list file.',
async () => {
const baseDir = dirname(dirname(dirname(__dirname)));
const outputPath = join(baseDir, 'readme', 'licenses.md');
const result = await buildFullReport([
'app-mobile',
'app-cli',
'app-desktop',
'server',
]);
console.log(`Writing to ${outputPath}...`);
await writeFile(outputPath, result, 'utf-8');
},
)
.command(
'report <name>',
'Generates a CSV report for a given package in the Joplin workspace',
(yargs) => {
return yargs.options({
name: {
type: 'string',
describe: 'The name of the package (in the Joplin workspace) for which to generate the report.',
},
});
},
async (argv) => {
const report = await buildReport([argv.name]);
const csv = [['Development dependency?', 'Packages', 'License ID', 'Has notice?'].join(',')];
for (const [type, entries] of Object.entries(report)) {
for (const entry of entries) {
csv.push([
type === DependencyType.Development,
csvQuote(entry.packageNames.join(',')),
entry.licenseId,
!!entry.license.notice,
].join(','));
}
}
console.log(csv.join('\n'));
},
)
.help()
.parse(hideBin(process.argv));

View File

@@ -10,7 +10,7 @@
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json",
"test": "jest --verbose=false",
"test-ci": "yarn test",
"generate-license-statement": "ts-node licenses/licenseStatementBuilder.ts --pkg"
"generate-license-statement": "ts-node licenses/licenseStatementBuilder.ts"
},
"publishConfig": {
"access": "public"