From d97a86635250136f1e16225e5e6001fa048fbc7b Mon Sep 17 00:00:00 2001 From: SukkaW Date: Tue, 21 Jan 2025 14:14:27 +0800 Subject: [PATCH 1/6] Update CDN Hosts --- Source/domainset/cdn.conf | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index a6b77adae..62687df3d 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -43,6 +43,7 @@ ajax.cdnjs.com .gitcdn.link .combinatronics.io .bootstrapcdn.com +cdn.staticdelivr.com # TODO remove this in the future twemoji.maxcdn.com emoji-css.afeld.me @@ -678,6 +679,7 @@ avatars.hubspot.net .f.hubspotfree.net .hubspotusercontent-eu1.net .hubspotusercontent-na1.net +.hubspotlinks.com # you said no cache, I said go secondary policy, you are using S3 anyway no-cache.hubspot.com @@ -1032,6 +1034,7 @@ vetted.ai cdn.jetboost.io .klarnacdn.net js.klarn.com +js.klarna.com na-library.klarnaservices.com cdn.onesignal.com cdn.privacy-mgmt.com @@ -1170,6 +1173,20 @@ js.upscope.io sdk.customfit.ai .uicdn.net widget.docsbot.ai +cdn.quickemailverification.com + +checkout-sdk.bigcommerce.com +cdn1.bigcommerce.com +cdn2.bigcommerce.com +cdn3.bigcommerce.com +cdn4.bigcommerce.com +cdn5.bigcommerce.com +cdn6.bigcommerce.com +cdn7.bigcommerce.com +cdn8.bigcommerce.com +cdn9.bigcommerce.com +cdn10.bigcommerce.com +cdn11.bigcommerce.com js.chargebee.com js1.chargebee.com @@ -3163,6 +3180,8 @@ consumersite-assets.trustpilot.net simplicity.trustpilot.com user-images.trustpilot.com widget.trustpilot.com +ecommplugins-scripts.trustpilot.com +ecommplugins-trustboxsettings.trustpilot.com a.fsdn.com s1.qwant.com s2.qwant.com @@ -3978,3 +3997,5 @@ img.coomer.su statics.erothots.co statics.simpshub.com .static.mega.co.nz +cdn.workos.com +frontend-apps.workos.com From 07419a79428cf2358e4a0829a06df8a6e4e7762d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 10:52:03 +0800 Subject: [PATCH 2/6] Perf: faster `fetchAssets` (without string and manual split) --- Build/build-reject-ip-list.ts | 14 +++++--------- Build/lib/fetch-assets.ts | 16 ++++++++++++---- Build/lib/parse-filter/domainlists.ts | 16 ++++++---------- Build/lib/parse-filter/filters.ts | 4 +--- Build/lib/parse-filter/hosts.ts | 18 ++++-------------- 5 files changed, 28 insertions(+), 40 deletions(-) diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index 14b8ea172..9be65f330 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -4,7 +4,6 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from import { task } from './trace'; import { SHARED_DESCRIPTION } from './constants/description'; import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip'; -import { processLine } from './lib/process-line'; import { RulesetOutput } from './lib/create-file'; import { SOURCE_DIR } from './constants/dir'; import { $$fetch } from './lib/fetch-retry'; @@ -37,14 +36,11 @@ const BOTNET_FILTER_MIRROR_URL = [ // https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt ]; -const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => { - const ip = processLine(cur); - if (ip) { - if (isProbablyIpv4(ip)) { - acc[0].push(ip); - } else if (isProbablyIpv6(ip)) { - acc[1].push(ip); - } +const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL, true).then(arr => arr.reduce<[ipv4: string[], ipv6: string[]]>((acc, ip) => { + if (isProbablyIpv4(ip)) { + acc[0].push(ip); + } else if (isProbablyIpv6(ip)) { + acc[1].push(ip); } return acc; }, [[], []])); diff --git a/Build/lib/fetch-assets.ts b/Build/lib/fetch-assets.ts index 295e293d5..633104806 100644 --- a/Build/lib/fetch-assets.ts +++ b/Build/lib/fetch-assets.ts @@ -1,6 +1,9 @@ import picocolors from 'picocolors'; import { $$fetch, defaultRequestInit, ResponseError } from './fetch-retry'; import { waitWithAbort } from 'foxts/wait'; +import { nullthrow } from 'foxts/guard'; +import { TextLineStream } from './text-line-transform-stream'; +import { ProcessLineStream } from './process-line'; // eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better export class CustomAbortError extends Error { @@ -26,7 +29,7 @@ export class CustomNoETagFallbackError extends Error { } } -export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[]) { +export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false) { const controller = new AbortController(); const createFetchFallbackPromise = async (url: string, index: number) => { @@ -44,14 +47,19 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined | throw new CustomAbortError(); } const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit }); - const text = await res.text(); - if (text.length < 2) { + let stream = nullthrow(res.body).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream()); + if (processLine) { + stream = stream.pipeThrough(new ProcessLineStream()); + } + const arr = await Array.fromAsync(stream); + + if (arr.length < 1) { throw new ResponseError(res, url, 'empty response w/o 304'); } controller.abort(); - return text; + return arr; }; if (!fallbackUrls || fallbackUrls.length === 0) { diff --git a/Build/lib/parse-filter/domainlists.ts b/Build/lib/parse-filter/domainlists.ts index 63df41073..834c6c1b0 100644 --- a/Build/lib/parse-filter/domainlists.ts +++ b/Build/lib/parse-filter/domainlists.ts @@ -16,10 +16,7 @@ function domainListLineCb(l: string, set: string[], meta: string, normalizeDomai set.push(domain); } -function domainListLineCbIncludeAllSubdomain(l: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) { - const line = processLine(l); - if (!line) return; - +function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) { const domain = normalizeDomain(line); if (!domain) return; @@ -36,12 +33,12 @@ export function processDomainLists( const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb; return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { - const text = await span.traceChildAsync('download', () => fetchAssets( + const filterRules = await span.traceChildAsync('download', () => fetchAssets( domainListsUrl, - mirrors + mirrors, + true )); const domainSets: string[] = []; - const filterRules = text.split('\n'); span.traceChildSync('parse domain list', () => { for (let i = 0, len = filterRules.length; i < len; i++) { @@ -59,13 +56,12 @@ export function processDomainListsWithPreload( ) { const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain; - const downloadPromise = fetchAssets(domainListsUrl, mirrors); + const downloadPromise = fetchAssets(domainListsUrl, mirrors, true); const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb; return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { - const text = await span.traceChildPromise('download', downloadPromise); + const filterRules = await span.traceChildPromise('download', downloadPromise); const domainSets: string[] = []; - const filterRules = text.split('\n'); span.traceChildSync('parse domain list', () => { for (let i = 0, len = filterRules.length; i < len; i++) { diff --git a/Build/lib/parse-filter/filters.ts b/Build/lib/parse-filter/filters.ts index dfebf7a98..2e2761540 100644 --- a/Build/lib/parse-filter/filters.ts +++ b/Build/lib/parse-filter/filters.ts @@ -28,7 +28,7 @@ export function processFilterRulesWithPreload( const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls); return (span: Span) => span.traceChildAsync>(`process filter rules: ${filterRulesUrl}`, async (span) => { - const text = await span.traceChildPromise('download', downloadPromise); + const filterRules = await span.traceChildPromise('download', downloadPromise); const whiteDomains = new Set(); const whiteDomainSuffixes = new Set(); @@ -82,8 +82,6 @@ export function processFilterRulesWithPreload( } }; - const filterRules = text.split('\n'); - span.traceChild('parse adguard filter').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { lineCb(filterRules[i]); diff --git a/Build/lib/parse-filter/hosts.ts b/Build/lib/parse-filter/hosts.ts index db4dd2ae0..865c8f870 100644 --- a/Build/lib/parse-filter/hosts.ts +++ b/Build/lib/parse-filter/hosts.ts @@ -1,15 +1,9 @@ import type { Span } from '../../trace'; import { fetchAssets } from '../fetch-assets'; import { fastNormalizeDomain } from '../normalize-domain'; -import { processLine } from '../process-line'; import { onBlackFound } from './shared'; -function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) { - const line = processLine(l); - if (!line) { - return; - } - +function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) { const _domain = line.split(/\s/)[1]?.trim(); if (!_domain) { return; @@ -29,12 +23,10 @@ export function processHosts( hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false ) { return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => { - const text = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors)); + const filterRules = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors, true)); const domainSets: string[] = []; - const filterRules = text.split('\n'); - span.traceChild('parse hosts').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl); @@ -46,15 +38,13 @@ export function processHosts( } export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) { - const downloadPromise = fetchAssets(hostsUrl, mirrors); + const downloadPromise = fetchAssets(hostsUrl, mirrors, true); return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => { - const text = await span.traceChild('download').tracePromise(downloadPromise); + const filterRules = await span.traceChild('download').tracePromise(downloadPromise); const domainSets: string[] = []; - const filterRules = text.split('\n'); - span.traceChild('parse hosts').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl); From e19d7989c3dc176256fd370bde6012b4515288cf Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 20:04:41 +0800 Subject: [PATCH 3/6] Perf/Refactor: faster ip version --- Build/build-common.ts | 2 +- Build/build-reject-ip-list.ts | 12 +++++++----- Build/build-telegram-cidr.ts | 9 ++++----- Build/lib/cache-filesystem.ts | 2 +- Build/lib/misc.ts | 4 ++++ Build/lib/rules/ruleset.ts | 32 ++++++++++++++++++++------------ 6 files changed, 37 insertions(+), 24 deletions(-) diff --git a/Build/build-common.ts b/Build/build-common.ts index b982e777c..277f06b45 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -147,7 +147,7 @@ async function transformRuleset(parentSpan: Span, sourcePath: string, relativePa if (res === $skip) return; const id = basename; - const type = relativePath.slice(0, -extname.length).split(path.sep)[0]; + const type = relativePath.split(path.sep)[0]; if (type !== 'ip' && type !== 'non_ip') { throw new TypeError(`Invalid type: ${type}`); diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index 9be65f330..acb830478 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -3,11 +3,11 @@ import path from 'node:path'; import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './constants/description'; -import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip'; import { RulesetOutput } from './lib/create-file'; import { SOURCE_DIR } from './constants/dir'; import { $$fetch } from './lib/fetch-retry'; import { fetchAssets } from './lib/fetch-assets'; +import { fastIpVersion } from './lib/misc'; const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'; const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $$fetch(BOGUS_NXDOMAIN_URL).then(async (resp) => { @@ -17,9 +17,10 @@ const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $$ for await (const line of createReadlineInterfaceFromResponse(resp, true)) { if (line.startsWith('bogus-nxdomain=')) { const ip = line.slice(15).trim(); - if (isProbablyIpv4(ip)) { + const v = fastIpVersion(ip); + if (v === 4) { ipv4.push(ip); - } else if (isProbablyIpv6(ip)) { + } else if (v === 6) { ipv6.push(ip); } } @@ -37,9 +38,10 @@ const BOTNET_FILTER_MIRROR_URL = [ ]; const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL, true).then(arr => arr.reduce<[ipv4: string[], ipv6: string[]]>((acc, ip) => { - if (isProbablyIpv4(ip)) { + const v = fastIpVersion(ip); + if (v === 4) { acc[0].push(ip); - } else if (isProbablyIpv6(ip)) { + } else if (v === 6) { acc[1].push(ip); } return acc; diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index 4f10f9e60..7dbb561d1 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -1,11 +1,11 @@ // @ts-check import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; -import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './constants/description'; import { createMemoizedPromise } from './lib/memo-promise'; import { RulesetOutput } from './lib/create-file'; import { $$fetch } from './lib/fetch-retry'; +import { fastIpVersion } from './lib/misc'; export const getTelegramCIDRPromise = createMemoizedPromise(async () => { const resp = await $$fetch('https://core.telegram.org/resources/cidr.txt'); @@ -20,11 +20,10 @@ export const getTelegramCIDRPromise = createMemoizedPromise(async () => { const ipcidr6: string[] = []; for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) { - const [subnet] = cidr.split('/'); - if (isProbablyIpv4(subnet)) { + const v = fastIpVersion(cidr); + if (v === 4) { ipcidr.push(cidr); - } - if (isProbablyIpv6(subnet)) { + } else if (v === 6) { ipcidr6.push(cidr); } } diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index 181879e88..7d348215c 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -199,7 +199,7 @@ export const deserializeSet = (str: string) => new Set(str.split(separator)); export const serializeArray = (arr: string[]) => fastStringArrayJoin(arr, separator); export const deserializeArray = (str: string) => str.split(separator); -export const getFileContentHash = (filename: string) => simpleStringHash(fs.readFileSync(filename, 'utf-8')); +const getFileContentHash = (filename: string) => simpleStringHash(fs.readFileSync(filename, 'utf-8')); export function createCacheKey(filename: string) { const fileHash = getFileContentHash(filename); return (key: string) => key + '$' + fileHash + '$'; diff --git a/Build/lib/misc.ts b/Build/lib/misc.ts index 422a65bcb..e2b13ee3b 100644 --- a/Build/lib/misc.ts +++ b/Build/lib/misc.ts @@ -69,3 +69,7 @@ export function isDirectoryEmptySync(path: PathLike) { directoryHandle.closeSync(); } } + +export function fastIpVersion(ip: string) { + return ip.includes(':') ? 6 : (ip.includes('.') ? 4 : 0); +} diff --git a/Build/lib/rules/ruleset.ts b/Build/lib/rules/ruleset.ts index c45e1cf51..9e8a57765 100644 --- a/Build/lib/rules/ruleset.ts +++ b/Build/lib/rules/ruleset.ts @@ -7,7 +7,8 @@ import type { SingboxSourceFormat } from '../singbox'; import { RuleOutput } from './base'; import picocolors from 'picocolors'; import { normalizeDomain } from '../normalize-domain'; -import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip'; +import { isProbablyIpv4 } from 'foxts/is-probably-ip'; +import { fastIpVersion } from '../misc'; type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]]; @@ -93,10 +94,11 @@ export class RulesetOutput extends RuleOutput { if (value.includes('/')) { return `SRC-IP-CIDR,${value}`; } - if (isProbablyIpv4(value)) { + const v = fastIpVersion(value); + if (v === 4) { return `SRC-IP-CIDR,${value}/32`; } - if (isProbablyIpv6(value)) { + if (v === 6) { return `SRC-IP-CIDR6,${value}/128`; } return ''; @@ -148,10 +150,14 @@ export class RulesetOutput extends RuleOutput { source_ip_cidr: [...this.sourceIpOrCidr].reduce((acc, cur) => { if (cur.includes('/')) { acc.push(cur); - } else if (isProbablyIpv4(cur)) { - acc.push(cur + '/32'); - } else if (isProbablyIpv6(cur)) { - acc.push(cur + '/128'); + } else { + const v = fastIpVersion(cur); + + if (v === 4) { + acc.push(cur + '/32'); + } else if (v === 6) { + acc.push(cur + '/128'); + } } return acc; @@ -245,11 +251,13 @@ export class RulesetOutput extends RuleOutput { for (const i of urlRegexResults) { for (const processed of i.processed) { - if (normalizeDomain( - processed - .replaceAll('*', 'a') - .replaceAll('?', 'b') - )) { + if ( + normalizeDomain( + processed + .replaceAll('*', 'a') + .replaceAll('?', 'b') + ) + ) { parsed.push([i.origin, processed]); } else if (!isProbablyIpv4(processed)) { parsedFailures.push([i.origin, processed]); From a5a1a82d0014bf079e5ce522b1dd2ef5dfdf050b Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 20:04:44 +0800 Subject: [PATCH 4/6] Update CDN & Global Hosts --- Source/domainset/cdn.conf | 1 + Source/non_ip/global.conf | 1 + 2 files changed, 2 insertions(+) diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index 62687df3d..857fedbc2 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -114,6 +114,7 @@ eslint.style .stylexjs.com pytorch.org nextra.site +.rescript-lang.org # JS.ORG .js.org diff --git a/Source/non_ip/global.conf b/Source/non_ip/global.conf index 6eb0456e3..073641c4b 100644 --- a/Source/non_ip/global.conf +++ b/Source/non_ip/global.conf @@ -536,6 +536,7 @@ DOMAIN-SUFFIX,bitbucket.io DOMAIN-SUFFIX,bit.ly DOMAIN-SUFFIX,bitly.com DOMAIN-SUFFIX,bitmex.com +DOMAIN-SUFFIX,bleepingcomputer.com DOMAIN-SUFFIX,blogimg.jp DOMAIN-SUFFIX,bloomberg.com DOMAIN-SUFFIX,bloomberg.net From 623e45bba66cf484c6758a2b9edbf9a6ef90183c Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 21:18:52 +0800 Subject: [PATCH 5/6] Update Reject Hosts --- Source/domainset/reject_sukka.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/domainset/reject_sukka.conf b/Source/domainset/reject_sukka.conf index 96c786ce4..b517240f7 100644 --- a/Source/domainset/reject_sukka.conf +++ b/Source/domainset/reject_sukka.conf @@ -829,6 +829,7 @@ simple.cloudsmith.com .carpe.pages.dev datum.jsdelivr.com tracking.yorg.app +.demo.medama.io .taginstall.com .tealiumiq.com stats.wp.com From 3c2b49df763962bd70f123664615142482e3874b Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 20:49:56 +0800 Subject: [PATCH 6/6] Chore: minor changes --- Build/build-common.ts | 77 ++++++++------------------- Build/lib/fetch-assets.ts | 22 ++------ Build/lib/fetch-retry.ts | 42 +++++++-------- Build/lib/fetch-text-by-line.bench.ts | 3 +- Build/lib/fetch-text-by-line.ts | 40 ++++---------- Build/lib/parse-dnsmasq.ts | 12 +---- package.json | 1 - pnpm-lock.yaml | 54 ------------------- 8 files changed, 57 insertions(+), 194 deletions(-) diff --git a/Build/build-common.ts b/Build/build-common.ts index 277f06b45..5cffc3cb1 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -16,8 +16,6 @@ const MAGIC_COMMAND_TITLE = '# $ meta_title '; const MAGIC_COMMAND_DESCRIPTION = '# $ meta_description '; const MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES = '# $ sgmodule_mitm_hostnames '; -const domainsetSrcFolder = 'domainset' + path.sep; - const clawSourceDirPromise = new Fdir() .withRelativePaths() .filter((filepath, isDirectory) => { @@ -39,15 +37,11 @@ export const buildCommon = task(require.main === module, __filename)(async (span const relativePath = paths[i]; const fullPath = SOURCE_DIR + path.sep + relativePath; - if (relativePath.startsWith(domainsetSrcFolder)) { - promises.push(transformDomainset(span, fullPath)); - continue; - } // if ( // relativePath.startsWith('ip/') // || relativePath.startsWith('non_ip/') // ) { - promises.push(transformRuleset(span, fullPath, relativePath)); + promises.push(transform(span, fullPath, relativePath)); // continue; // } @@ -102,71 +96,44 @@ function processFile(span: Span, sourcePath: string) { }); } -function transformDomainset(parentSpan: Span, sourcePath: string) { - const extname = path.extname(sourcePath); - const basename = path.basename(sourcePath, extname); - return parentSpan - .traceChildAsync( - `transform domainset: ${basename}`, - async (span) => { - const res = await processFile(span, sourcePath); - if (res === $skip) return; - - const id = basename; - const [title, incomingDescriptions, lines] = res; - - let finalDescriptions: string[]; - if (incomingDescriptions.length) { - finalDescriptions = SHARED_DESCRIPTION.slice(); - finalDescriptions.push(''); - appendArrayInPlace(finalDescriptions, incomingDescriptions); - } else { - finalDescriptions = SHARED_DESCRIPTION; - } - - return new DomainsetOutput(span, id) - .withTitle(title) - .withDescription(finalDescriptions) - .addFromDomainset(lines) - .write(); - } - ); -} - -/** - * Output Surge RULE-SET and Clash classical text format - */ -async function transformRuleset(parentSpan: Span, sourcePath: string, relativePath: string) { +async function transform(parentSpan: Span, sourcePath: string, relativePath: string) { const extname = path.extname(sourcePath); - const basename = path.basename(sourcePath, extname); + const id = path.basename(sourcePath, extname); return parentSpan - .traceChild(`transform ruleset: ${basename}`) + .traceChild(`transform ruleset: ${id}`) .traceAsyncFn(async (span) => { - const res = await processFile(span, sourcePath); - if (res === $skip) return; - - const id = basename; const type = relativePath.split(path.sep)[0]; - if (type !== 'ip' && type !== 'non_ip') { + if (type !== 'ip' && type !== 'non_ip' && type !== 'domainset') { throw new TypeError(`Invalid type: ${type}`); } + const res = await processFile(span, sourcePath); + if (res === $skip) return; + const [title, descriptions, lines, sgmodulePathname] = res; - let description: string[]; + let finalDescriptions: string[]; if (descriptions.length) { - description = SHARED_DESCRIPTION.slice(); - description.push(''); - appendArrayInPlace(description, descriptions); + finalDescriptions = SHARED_DESCRIPTION.slice(); + finalDescriptions.push(''); + appendArrayInPlace(finalDescriptions, descriptions); } else { - description = SHARED_DESCRIPTION; + finalDescriptions = SHARED_DESCRIPTION; + } + + if (type === 'domainset') { + return new DomainsetOutput(span, id) + .withTitle(title) + .withDescription(finalDescriptions) + .addFromDomainset(lines) + .write(); } return new RulesetOutput(span, id, type) .withTitle(title) - .withDescription(description) + .withDescription(finalDescriptions) .withMitmSgmodulePath(sgmodulePathname) .addFromRuleset(lines) .write(); diff --git a/Build/lib/fetch-assets.ts b/Build/lib/fetch-assets.ts index 633104806..bd12b5f60 100644 --- a/Build/lib/fetch-assets.ts +++ b/Build/lib/fetch-assets.ts @@ -11,23 +11,7 @@ export class CustomAbortError extends Error { public readonly digest = 'AbortError'; } -export class Custom304NotModifiedError extends Error { - public readonly name = 'Custom304NotModifiedError'; - public readonly digest = 'Custom304NotModifiedError'; - - constructor(public readonly url: string, public readonly data: string) { - super('304 Not Modified'); - } -} - -export class CustomNoETagFallbackError extends Error { - public readonly name = 'CustomNoETagFallbackError'; - public readonly digest = 'CustomNoETagFallbackError'; - - constructor(public readonly data: string) { - super('No ETag Fallback'); - } -} +const reusedCustomAbortError = new CustomAbortError(); export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false) { const controller = new AbortController(); @@ -39,12 +23,12 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined | await waitWithAbort(50 + (index + 1) * 100, controller.signal); } catch { console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url)); - throw new CustomAbortError(); + throw reusedCustomAbortError; } } if (controller.signal.aborted) { console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url)); - throw new CustomAbortError(); + throw reusedCustomAbortError; } const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit }); diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index 64d2c2e3e..c70ea10ef 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -34,11 +34,7 @@ setGlobalDispatcher(agent.compose( // TODO: this part of code is only for allow more errors to be retried by default // This should be removed once https://github.com/nodejs/undici/issues/3728 is implemented retry(err, { state, opts }, cb) { - const statusCode = 'statusCode' in err && typeof err.statusCode === 'number' ? err.statusCode : null; const errorCode = 'code' in err ? (err as NodeJS.ErrnoException).code : undefined; - const headers = ('headers' in err && typeof err.headers === 'object') ? err.headers : undefined; - - const { counter } = state; // Any code that is not a Undici's originated and allowed to retry if ( @@ -49,42 +45,44 @@ setGlobalDispatcher(agent.compose( return cb(err); } + const statusCode = 'statusCode' in err && typeof err.statusCode === 'number' ? err.statusCode : null; + + // bail out if the status code matches one of the following + if ( + statusCode != null + && ( + statusCode === 401 // Unauthorized, should check credentials instead of retrying + || statusCode === 403 // Forbidden, should check permissions instead of retrying + || statusCode === 404 // Not Found, should check URL instead of retrying + || statusCode === 405 // Method Not Allowed, should check method instead of retrying + ) + ) { + return cb(err); + } + // if (errorCode === 'UND_ERR_REQ_RETRY') { // return cb(err); // } - const { method, retryOptions = {} } = opts; - const { maxRetries = 5, minTimeout = 500, maxTimeout = 10 * 1000, timeoutFactor = 2, methods = ['GET', 'HEAD', 'OPTIONS', 'PUT', 'DELETE', 'TRACE'] - } = retryOptions; + } = opts.retryOptions || {}; // If we reached the max number of retries - if (counter > maxRetries) { + if (state.counter > maxRetries) { return cb(err); } // If a set of method are provided and the current method is not in the list - if (Array.isArray(methods) && !methods.includes(method)) { + if (Array.isArray(methods) && !methods.includes(opts.method)) { return cb(err); } - // bail out if the status code matches one of the following - if ( - statusCode != null - && ( - statusCode === 401 // Unauthorized, should check credentials instead of retrying - || statusCode === 403 // Forbidden, should check permissions instead of retrying - || statusCode === 404 // Not Found, should check URL instead of retrying - || statusCode === 405 // Method Not Allowed, should check method instead of retrying - ) - ) { - return cb(err); - } + const headers = ('headers' in err && typeof err.headers === 'object') ? err.headers : undefined; const retryAfterHeader = (headers as Record | null | undefined)?.['retry-after']; let retryAfter = -1; @@ -97,7 +95,7 @@ setGlobalDispatcher(agent.compose( const retryTimeout = retryAfter > 0 ? Math.min(retryAfter, maxTimeout) - : Math.min(minTimeout * (timeoutFactor ** (counter - 1)), maxTimeout); + : Math.min(minTimeout * (timeoutFactor ** (state.counter - 1)), maxTimeout); console.log('[fetch retry]', 'schedule retry', { statusCode, retryTimeout, errorCode, url: opts.origin }); // eslint-disable-next-line sukka/prefer-timer-id -- won't leak diff --git a/Build/lib/fetch-text-by-line.bench.ts b/Build/lib/fetch-text-by-line.bench.ts index 34d8c6cfc..02660c113 100644 --- a/Build/lib/fetch-text-by-line.bench.ts +++ b/Build/lib/fetch-text-by-line.bench.ts @@ -1,4 +1,4 @@ -import { readFileByLine, readFileByLineLegacy, readFileByLineNew } from './fetch-text-by-line'; +import { readFileByLine, readFileByLineNew } from './fetch-text-by-line'; import path from 'node:path'; import fsp from 'node:fs/promises'; import { OUTPUT_SURGE_DIR } from '../constants/dir'; @@ -10,7 +10,6 @@ const file = path.join(OUTPUT_SURGE_DIR, 'domainset/reject_extra.conf'); group(() => { bench('readFileByLine', () => Array.fromAsync(readFileByLine(file))); - bench('readFileByLineLegacy', () => Array.fromAsync(readFileByLineLegacy(file))); bench('readFileByLineNew', async () => Array.fromAsync(await readFileByLineNew(file))); bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n'))); }); diff --git a/Build/lib/fetch-text-by-line.ts b/Build/lib/fetch-text-by-line.ts index c40446501..025f0fa50 100644 --- a/Build/lib/fetch-text-by-line.ts +++ b/Build/lib/fetch-text-by-line.ts @@ -1,5 +1,4 @@ import fs from 'node:fs'; -import { Readable } from 'node:stream'; import fsp from 'node:fs/promises'; import type { FileHandle } from 'node:fs/promises'; import readline from 'node:readline'; @@ -11,19 +10,7 @@ import { processLine, ProcessLineStream } from './process-line'; import { $$fetch } from './fetch-retry'; import type { UndiciResponseData } from './fetch-retry'; import type { Response as UnidiciWebResponse } from 'undici'; - -function getReadableStream(file: string | FileHandle): ReadableStream { - if (typeof file === 'string') { - // return fs.openAsBlob(file).then(blob => blob.stream()) - return Readable.toWeb(fs.createReadStream(file/* , { encoding: 'utf-8' } */)); - } - return file.readableWebStream(); -} - -// TODO: use FileHandle.readLine() -export const readFileByLineLegacy: ((file: string /* | FileHandle */) => AsyncIterable) = (file: string | FileHandle) => getReadableStream(file) - .pipeThrough(new TextDecoderStream()) - .pipeThrough(new TextLineStream()); +import { invariant } from 'foxts/guard'; export function readFileByLine(file: string): AsyncIterable { return readline.createInterface({ @@ -37,26 +24,17 @@ export async function readFileByLineNew(file: string): Promise(resp: T): NonNullable { - if (resp.body == null) { - throw new Error('Failed to fetch remote text'); - } +export const createReadlineInterfaceFromResponse: ((resp: UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream) = (resp, processLine = false) => { + invariant(resp.body, 'Failed to fetch remote text'); if ('bodyUsed' in resp && resp.bodyUsed) { throw new Error('Body has already been consumed.'); } - return resp.body; -} - -export const createReadlineInterfaceFromResponse: ((resp: UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream) = (resp, processLine = false) => { - const stream = ensureResponseBody(resp); - - const webStream: ReadableStream = 'getReader' in stream - ? stream - : ( - 'text' in stream - ? stream.body as any - : Readable.toWeb(new Readable().wrap(stream)) - ); + let webStream: ReadableStream; + if ('pipeThrough' in resp.body) { + webStream = resp.body; + } else { + throw new TypeError('Invalid response body!'); + } const resultStream = webStream .pipeThrough(new TextDecoderStream()) diff --git a/Build/lib/parse-dnsmasq.ts b/Build/lib/parse-dnsmasq.ts index b1d493f70..959c74974 100644 --- a/Build/lib/parse-dnsmasq.ts +++ b/Build/lib/parse-dnsmasq.ts @@ -1,16 +1,8 @@ import { createReadlineInterfaceFromResponse } from './fetch-text-by-line'; -// https://github.com/remusao/tldts/issues/2121 -// In short, single label domain suffix is ignored due to the size optimization, so no isIcann -// import tldts from 'tldts-experimental'; -import tldts from 'tldts'; import type { UndiciResponseData } from './fetch-retry'; import type { Response } from 'undici'; - -function isDomainLoose(domain: string): boolean { - const r = tldts.parse(domain); - return !!(!r.isIp && (r.isIcann || r.isPrivate)); -} +import { fastNormalizeDomainIgnoreWww } from './normalize-domain'; export function extractDomainsFromFelixDnsmasq(line: string): string | null { if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { @@ -24,7 +16,7 @@ export async function parseFelixDnsmasqFromResp(resp: UndiciResponseData | Respo for await (const line of createReadlineInterfaceFromResponse(resp, true)) { const domain = extractDomainsFromFelixDnsmasq(line); - if (domain && isDomainLoose(domain)) { + if (domain && fastNormalizeDomainIgnoreWww(domain)) { results.push(domain); } } diff --git a/package.json b/package.json index a3351a31f..6f492c840 100644 --- a/package.json +++ b/package.json @@ -59,7 +59,6 @@ "@types/fast-fifo": "^1.3.0", "@types/mocha": "^10.0.10", "@types/node": "^22.10.7", - "@types/node-fetch": "^2.6.12", "@types/tar-fs": "^2.0.4", "@types/tar-stream": "^3.1.3", "eslint": "^9.18.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ede50c554..843f2eeeb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -131,9 +131,6 @@ importers: '@types/node': specifier: ^22.10.7 version: 22.10.7 - '@types/node-fetch': - specifier: ^2.6.12 - version: 2.6.12 '@types/tar-fs': specifier: ^2.0.4 version: 2.0.4 @@ -548,9 +545,6 @@ packages: '@types/mocha@10.0.10': resolution: {integrity: sha512-xPyYSz1cMPnJQhl0CLMH68j3gprKZaTjG3s5Vi+fDgx+uhG9NOXwbVt52eFS8ECyXhyKcjDLCBEqBExKuiZb7Q==} - '@types/node-fetch@2.6.12': - resolution: {integrity: sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==} - '@types/node@22.10.7': resolution: {integrity: sha512-V09KvXxFiutGp6B7XkpaDXlNadZxrzajcY50EuoLIpQ6WWYCSvf19lVIazzfIzQvhUN2HjX12spLojTnhuKlGg==} @@ -697,9 +691,6 @@ packages: async-retry@1.3.3: resolution: {integrity: sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==} - asynckit@0.4.0: - resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} - b4a@1.6.7: resolution: {integrity: sha512-OnAYlL5b7LEkALw87fUVafQw5rVR9RjwGd4KUwNQ6DrrNmaVaUCgLipfVlzrPQ4tWOR9P0IXGNOx50jYCCdSJg==} @@ -806,10 +797,6 @@ packages: colorette@2.0.20: resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==} - combined-stream@1.0.8: - resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} - engines: {node: '>= 0.8'} - comment-parser@1.4.1: resolution: {integrity: sha512-buhp5kePrmda3vhc5B9t7pUQXAb2Tnd0qgpkIhPhkHXxJpiPJ11H0ZEU0oBpJ2QztSbzG/ZxMj/CHsYJqRHmyg==} engines: {node: '>= 12.0.0'} @@ -859,10 +846,6 @@ packages: defu@6.1.4: resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==} - delayed-stream@1.0.0: - resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} - engines: {node: '>=0.4.0'} - detect-libc@2.0.3: resolution: {integrity: sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==} engines: {node: '>=8'} @@ -1138,10 +1121,6 @@ packages: resolution: {integrity: sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==} engines: {node: '>=14'} - form-data@4.0.1: - resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} - engines: {node: '>= 6'} - foxts@1.1.7: resolution: {integrity: sha512-Pw7S1yI20GY8gfj6RXt9usRE5TdQ/lgAqpy2EaWKUVNARC+jW0hxx/MQH8xkNlT3NSpt0X1P99CJTEvh3kVdUQ==} @@ -1349,14 +1328,6 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} - mime-db@1.52.0: - resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} - engines: {node: '>= 0.6'} - - mime-types@2.1.35: - resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} - engines: {node: '>= 0.6'} - mimic-response@3.1.0: resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==} engines: {node: '>=10'} @@ -2239,11 +2210,6 @@ snapshots: '@types/mocha@10.0.10': {} - '@types/node-fetch@2.6.12': - dependencies: - '@types/node': 22.10.7 - form-data: 4.0.1 - '@types/node@22.10.7': dependencies: undici-types: 6.20.0 @@ -2421,8 +2387,6 @@ snapshots: dependencies: retry: 0.13.1 - asynckit@0.4.0: {} - b4a@1.6.7: {} balanced-match@1.0.2: {} @@ -2549,10 +2513,6 @@ snapshots: colorette@2.0.20: {} - combined-stream@1.0.8: - dependencies: - delayed-stream: 1.0.0 - comment-parser@1.4.1: {} concat-map@0.0.1: {} @@ -2587,8 +2547,6 @@ snapshots: defu@6.1.4: {} - delayed-stream@1.0.0: {} - detect-libc@2.0.3: {} diff-sequences@29.6.3: {} @@ -2936,12 +2894,6 @@ snapshots: cross-spawn: 7.0.6 signal-exit: 4.1.0 - form-data@4.0.1: - dependencies: - asynckit: 0.4.0 - combined-stream: 1.0.8 - mime-types: 2.1.35 - foxts@1.1.7: {} fs-constants@1.0.0: {} @@ -3136,12 +3088,6 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 - mime-db@1.52.0: {} - - mime-types@2.1.35: - dependencies: - mime-db: 1.52.0 - mimic-response@3.1.0: {} minimatch@3.1.2: