Skip to content

Commit

Permalink
Merge branch 'SukkaW:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
FYLSen authored Jan 13, 2025
2 parents 59caf2b + b0a7a0b commit df4d0a5
Show file tree
Hide file tree
Showing 50 changed files with 961 additions and 1,293 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/check-source-domain.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,37 @@ jobs:
with:
node-version-file: ".node-version"
cache: "pnpm"
- name: Get current date
id: date
run: |
echo "date=$(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT
echo "year=$(date +'%Y')" >> $GITHUB_OUTPUT
echo "month=$(date +'%m')" >> $GITHUB_OUTPUT
echo "day=$(date +'%d')" >> $GITHUB_OUTPUT
echo "hour=$(date +'%H')" >> $GITHUB_OUTPUT
echo "minute=$(date +'%M')" >> $GITHUB_OUTPUT
echo "second=$(date +'%S')" >> $GITHUB_OUTPUT
- name: Restore cache.db
uses: actions/cache/restore@v4
id: cache-db-restore
with:
path: |
.cache
key: ${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-${{ steps.date.outputs.day }} ${{ steps.date.outputs.hour }}:${{ steps.date.outputs.minute }}:${{ steps.date.outputs.second }}
# If source files changed but packages didn't, rebuild from a prior cache.
restore-keys: |
${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-${{ steps.date.outputs.day }} ${{ steps.date.outputs.hour }}:${{ steps.date.outputs.minute }}:
${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-${{ steps.date.outputs.day }} ${{ steps.date.outputs.hour }}:
${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-${{ steps.date.outputs.day }}
${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-
${{ runner.os }}-v3-${{ steps.date.outputs.year }}-
${{ runner.os }}-v3-
- run: pnpm install
- run: pnpm run node Build/validate-domain-alive.ts
- name: Cache cache.db
if: always()
uses: actions/cache/save@v4
with:
path: |
.cache
key: ${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-${{ steps.date.outputs.day }} ${{ steps.date.outputs.hour }}:${{ steps.date.outputs.minute }}:${{ steps.date.outputs.second }}
17 changes: 17 additions & 0 deletions Build/_get-lum-apex-domains.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import tldts from 'tldts';

(async () => {
const lines = await Array.fromAsync(await fetchRemoteTextByLine('https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true));

const set = new Set<string>();

lines.forEach((line) => {
const apexDomain = tldts.getDomain(line.slice(8));
if (apexDomain) {
set.add(apexDomain);
}
});

console.log(Array.from(set).map(line => '.' + line).join('\n'));
})();
4 changes: 2 additions & 2 deletions Build/build-apple-cdn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { createMemoizedPromise } from './lib/memo-promise';
import { DomainsetOutput } from './lib/create-file';
import { $fetch } from './lib/make-fetch-happen';
import { $$fetch } from './lib/fetch-retry';

export const getAppleCdnDomainsPromise = createMemoizedPromise(() => $fetch('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf').then(parseFelixDnsmasqFromResp));
export const getAppleCdnDomainsPromise = createMemoizedPromise(() => $$fetch('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf').then(parseFelixDnsmasqFromResp));

export const buildAppleCdn = task(require.main === module, __filename)(async (span) => {
const res: string[] = await span.traceChildPromise('get apple cdn domains', getAppleCdnDomainsPromise());
Expand Down
2 changes: 1 addition & 1 deletion Build/build-chn-cidr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const PROBE_CHN_CIDR_V4 = [
'120.78.92.171'
];

export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail(
export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail<[], [string[], string[]]>(
async function getChnCidr() {
const [_cidr4, cidr6] = await Promise.all([
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt', true).then(Array.fromAsync<string>),
Expand Down
62 changes: 46 additions & 16 deletions Build/build-internal-reverse-chn-cidr.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,61 @@
import path from 'node:path';
import { task } from './trace';

import { exclude, merge } from 'fast-cidr-tools';
import { getChnCidrPromise } from './build-chn-cidr';
import { NON_CN_CIDR_INCLUDED_IN_CHNROUTE, RESERVED_IPV4_CIDR } from './constants/cidr';
// import { RESERVED_IPV4_CIDR, NON_CN_CIDR_INCLUDED_IN_CHNROUTE } from './constants/cidr';

import fs from 'node:fs';
import { OUTPUT_INTERNAL_DIR } from './constants/dir';
import { asyncWriteToStream } from 'foxts/async-write-to-stream';
import { mkdirp } from './lib/misc';
import { appendArrayInPlace } from './lib/append-array-in-place';
// import { appendArrayInPlace } from './lib/append-array-in-place';
import Worktank from 'worktank';

export const buildInternalReverseChnCIDR = task(require.main === module, __filename)(async () => {
const [cidr] = await getChnCidrPromise();
const pool = new Worktank({
name: 'build-internal-reverse-chn-cidr',
size: 1,
timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: { // An object mapping function names to functions objects to serialize and deserialize into each worker thread, only functions that don't depend on their closure can be serialized
// eslint-disable-next-line object-shorthand -- workertank
getreversedCidr: async function (cidr: string[], importMetaUrl: string): Promise<string[]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const { exclude, merge } = __require('fast-cidr-tools');
const { RESERVED_IPV4_CIDR, NON_CN_CIDR_INCLUDED_IN_CHNROUTE } = __require('./constants/cidr');
const { appendArrayInPlace } = __require('./lib/append-array-in-place');

const reversedCidr = merge(
appendArrayInPlace(
exclude(
['0.0.0.0/0'],
RESERVED_IPV4_CIDR.concat(cidr),
return merge(
appendArrayInPlace(
exclude(
['0.0.0.0/0'],
RESERVED_IPV4_CIDR.concat(cidr),
true
),
// https://github.com/misakaio/chnroutes2/issues/25
NON_CN_CIDR_INCLUDED_IN_CHNROUTE
),
true
),
// https://github.com/misakaio/chnroutes2/issues/25
NON_CN_CIDR_INCLUDED_IN_CHNROUTE
),
true
);
);
}
}
});

export const buildInternalReverseChnCIDR = task(require.main === module, __filename)(async (span) => {
const [cidr] = await span.traceChildPromise('download chnroutes2', getChnCidrPromise());

const reversedCidr = await span.traceChildAsync('build reversed chn cidr', async () => {
const reversedCidr = await pool.exec(
'getreversedCidr',
[cidr, import.meta.url]
);
pool.terminate();

return reversedCidr;
});

const outputFile = path.join(OUTPUT_INTERNAL_DIR, 'reversed-chn-cidr.txt');
await mkdirp(OUTPUT_INTERNAL_DIR);
Expand Down
101 changes: 46 additions & 55 deletions Build/build-reject-domainset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import path from 'node:path';
import process from 'node:process';

import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
import { processHosts } from './lib/parse-filter/hosts';
import { processDomainLists } from './lib/parse-filter/domainlists';
import { processFilterRules } from './lib/parse-filter/filters';

import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
import { compareAndWriteFile } from './lib/create-file';
Expand All @@ -18,10 +20,13 @@ import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
import { DomainsetOutput } from './lib/create-file';
import { foundDebugDomain } from './lib/parse-filter/shared';

const readLocalRejectDomainsetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
const readLocalRejectExtraDomainsetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka_extra.conf'));
const readLocalRejectRulesetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/reject.conf'));
const readLocalRejectDropRulesetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/reject-drop.conf'));
const readLocalRejectNoDropRulesetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/reject-no-drop.conf'));
const readLocalMyRejectRulesetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/my_reject.conf'));

export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
Expand Down Expand Up @@ -61,65 +66,49 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);

// Parse from AdGuard Filters
const shouldStop = await span
await span
.traceChild('download and process hosts / adblock filter rules')
.traceAsyncFn(async (childSpan) => {
// eslint-disable-next-line sukka/no-single-return -- not single return
let shouldStop = false;
await Promise.all([
// Parse from remote hosts & domain lists
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),

DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),

ADGUARD_FILTERS.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black, foundDebugDomain }) => {
if (foundDebugDomain) {
// eslint-disable-next-line sukka/no-single-return -- not single return
shouldStop = true;
// we should not break here, as we want to see full matches from all data source
}
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
})
),
ADGUARD_FILTERS_EXTRA.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black, foundDebugDomain }) => {
if (foundDebugDomain) {
// eslint-disable-next-line sukka/no-single-return -- not single return
shouldStop = true;
// we should not break here, as we want to see full matches from all data source
}
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
})
),
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
})),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),
readLocalRejectExtraDomainsetPromise.then(appendArrayToRejectExtraOutput),
// Dedupe domainSets
// span.traceChildAsync('collect black keywords/suffixes', async () =>
/**
.traceAsyncFn((childSpan) => Promise.all([
// Parse from remote hosts & domain lists
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),

DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),

ADGUARD_FILTERS.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
})
),
ADGUARD_FILTERS_EXTRA.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
})
),
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
})),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),
readLocalRejectExtraDomainsetPromise.then(appendArrayToRejectExtraOutput),
// Dedupe domainSets
// span.traceChildAsync('collect black keywords/suffixes', async () =>
/**
* Collect DOMAIN, DOMAIN-SUFFIX, and DOMAIN-KEYWORD from non_ip/reject.conf for deduplication
* DOMAIN-WILDCARD is not really useful for deduplication, it is only included in AdGuardHome output
*/
rejectOutput.addFromRuleset(readLocalRejectRulesetPromise),
rejectExtraOutput.addFromRuleset(readLocalRejectRulesetPromise)
].flat());
// eslint-disable-next-line sukka/no-single-return -- not single return
return shouldStop;
});
rejectOutput.addFromRuleset(readLocalRejectRulesetPromise),
rejectExtraOutput.addFromRuleset(readLocalRejectRulesetPromise)
].flat()));

if (shouldStop) {
if (foundDebugDomain.value) {
process.exit(1);
}

Expand Down Expand Up @@ -178,6 +167,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
await new DomainsetOutput(span, 'my_reject')
.addFromRuleset(readLocalMyRejectRulesetPromise)
.addFromRuleset(readLocalRejectRulesetPromise)
.addFromRuleset(readLocalRejectDropRulesetPromise)
.addFromRuleset(readLocalRejectNoDropRulesetPromise)
.done()
).adguardhome()
)
Expand Down
37 changes: 15 additions & 22 deletions Build/build-reject-ip-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
import { fsFetchCache, getFileContentHash } from './lib/cache-filesystem';
import { processLine } from './lib/process-line';
import { RulesetOutput } from './lib/create-file';
import { SOURCE_DIR } from './constants/dir';
import { $fetch } from './lib/make-fetch-happen';
import { $$fetch } from './lib/fetch-retry';
import { fetchAssetsWithout304 } from './lib/fetch-assets';

const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $fetch(BOGUS_NXDOMAIN_URL).then(async (resp) => {
const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $$fetch(BOGUS_NXDOMAIN_URL).then(async (resp) => {
const ipv4: string[] = [];
const ipv6: string[] = [];

Expand All @@ -37,26 +37,19 @@ const BOTNET_FILTER_MIRROR_URL = [
// https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt
];

const getBotNetFilterIPsPromise = fsFetchCache.applyWithHttp304AndMirrors<[ipv4: string[], ipv6: string[]]>(
BOTNET_FILTER_URL,
BOTNET_FILTER_MIRROR_URL,
getFileContentHash(__filename),
(text) => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
if (isProbablyIpv4(ip)) {
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc[1].push(ip);
}
const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssetsWithout304(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
if (isProbablyIpv4(ip)) {
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc[1].push(ip);
}
return acc;
}, [[], []]),
{
serializer: JSON.stringify,
deserializer: JSON.parse
}
);
return acc;
}, [[], []]));

const readLocalRejectIpListPromise = readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf'));

export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
const [bogusNxDomainIPs, botNetIPs] = await Promise.all([
Expand All @@ -75,7 +68,7 @@ export const buildRejectIPList = task(require.main === module, __filename)(async
' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter'
])
.addFromRuleset(await readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf')))
.addFromRuleset(readLocalRejectIpListPromise)
.bulkAddCIDR4NoResolve(bogusNxDomainIPs[0])
.bulkAddCIDR6NoResolve(bogusNxDomainIPs[1])
.bulkAddCIDR4NoResolve(botNetIPs[0])
Expand Down
Loading

0 comments on commit df4d0a5

Please sign in to comment.