Skip to content

Commit

Permalink
Improve Phishing score algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Sep 2, 2024
1 parent 3ebb007 commit c96b35b
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 52 deletions.
5 changes: 5 additions & 0 deletions Build/constants/loose-tldts-opt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@ export const looseTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
detectIp: false,
mixedInputs: false
};

export const loosTldOptWithPrivateDomains: Parameters<typeof tldts.getSubdomain>[1] = {
...looseTldtsOpt,
allowPrivateDomains: true
};
5 changes: 4 additions & 1 deletion Build/lib/get-phishing-domains.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import { calcDomainAbuseScore } from './get-phishing-domains';

describe('sortDomains', () => {
it('nmdj.pl', () => {
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl', '.01462ccca801fed55370d79231c876e5', false));
console.log(calcDomainAbuseScore('zrz.q435.one', 'zrz'));
console.log(calcDomainAbuseScore('z1.finprotect.click', 'z1'));
console.log(calcDomainAbuseScore('accountsettingaddrecoverymanagesiteupdatebillingreview.village.softcare.co.in', 'accountsettingaddrecoverymanagesiteupdatebillingreview.village'));
console.log(calcDomainAbuseScore('allegrolokalnie.pl-oferta51328742.pl', 'allegrolokalnie'));
});
});
115 changes: 64 additions & 51 deletions Build/lib/get-phishing-domains.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as tldts from 'tldts-experimental';
import type { Span } from '../trace';
import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick';
import { createCacheKey } from './cache-filesystem';
Expand Down Expand Up @@ -78,6 +78,7 @@ const BLACK_TLD = new Set([
'space',
'store',
'stream',
'surf',
'tech',
'tk',
'tokyo',
Expand All @@ -96,13 +97,13 @@ const BLACK_TLD = new Set([

const WHITELIST_MAIN_DOMAINS = new Set([
'w3s.link', // ipfs gateway
'dweb.link', // ipfs gateway
'nftstorage.link', // ipfs gateway
// 'dweb.link', // ipfs gateway
// 'nftstorage.link', // ipfs gateway
'fleek.cool', // ipfs gateway
'business.site', // Drag'n'Drop site building platform
'page.link', // Firebase URL Shortener
'notion.site',
'vercel.app',
// 'notion.site',
// 'vercel.app',
'gitbook.io'
]);

Expand All @@ -121,14 +122,18 @@ const sensitiveKeywords = createKeywordFilter([
'virus-',
'icloud-',
'apple-',
'www.apple.',
'-coinbase',
'coinbase-'
'coinbase-',
'lcloud.',
'lcloud-'
]);
const lowKeywords = createKeywordFilter([
'-co-jp',
'customer.',
'customer-',
'.www-'
'.www-',
'instagram'
]);

const cacheKey = createCacheKey(__filename);
Expand All @@ -144,6 +149,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
});

const domainCountMap: Record<string, number> = {};
const domainScoreMap: Record<string, number> = {};

span.traceChildSync('process phishing domain set', () => {
for (let i = 0, len = domainArr.length; i < len; i++) {
Expand All @@ -152,8 +158,13 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
const {
publicSuffix: tld,
domain: apexDomain,
subdomain
} = tldts.parse(line, looseTldtsOpt);
subdomain,
isPrivate
} = tldts.parse(line, loosTldOptWithPrivateDomains);

if (isPrivate) {
continue;
}

if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
Expand All @@ -164,67 +175,69 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
continue;
}

let sensitiveKeywordsHit: boolean | null = null;
if (tld.length < 6 && !tld.includes('.') && !BLACK_TLD.has(tld) && !(sensitiveKeywordsHit = sensitiveKeywords(line))) continue;

domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += calcDomainAbuseScore(line, subdomain, sensitiveKeywordsHit);
domainCountMap[apexDomain] += 1;

if (!('apexDomain' in domainScoreMap)) {
domainScoreMap[apexDomain] = 0;
if (BLACK_TLD.has(tld)) {
domainScoreMap[apexDomain] += 4;
} else if (tld.length > 6) {
domainScoreMap[apexDomain] += 2;
}
}
domainScoreMap[apexDomain] += calcDomainAbuseScore(line, subdomain);
}
});

for (const domain in domainCountMap) {
if (domainCountMap[domain] >= 10 && !WHITELIST_MAIN_DOMAINS.has(domain)) {
for (const domain in domainScoreMap) {
if (
!WHITELIST_MAIN_DOMAINS.has(domain)
&& (
domainScoreMap[domain] >= 12
|| (domainScoreMap[domain] > 6 && domainCountMap[domain] >= 3)
)
) {
console.log({ domain });
domainArr.push(`.${domain}`);
}
}

return domainArr;
});

export function calcDomainAbuseScore(line: string, subdomain: string | null, sensitiveKeywordsHit: boolean | null) {
let weight = 1;
export function calcDomainAbuseScore(line: string, subdomain: string | null) {
let weight = 0;

const hitLowKeywords = lowKeywords(line);
if (subdomain) {
const hitLowKeywords = lowKeywords(subdomain);
const sensitiveKeywordsHit = sensitiveKeywords(subdomain);

sensitiveKeywordsHit ??= sensitiveKeywords(line);
if (sensitiveKeywordsHit) {
weight += 4;
if (hitLowKeywords) {
weight += 5;
if (sensitiveKeywordsHit) {
weight += 8;
if (hitLowKeywords) {
weight += 4;
}
} else if (hitLowKeywords) {
weight += 1;
}
} else if (hitLowKeywords) {
weight += 0.5;
}

const lineLen = line.length;

if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
weight += 3.5;
} else if (lineLen > 34) {
weight += 2.5;
} else if (lineLen > 29) {
weight += 1.5;
} else if (lineLen > 24) {
weight += 0.75;
} else {
weight += 0.25;
}
}
const subdomainLength = subdomain.length;

if (subdomain) {
if (subdomain.length > 40) {
weight += 3;
} else if (subdomain.length > 30) {
weight += 1.5;
} else if (subdomain.length > 20) {
if (subdomainLength > 40) {
weight += 8;
} else if (subdomainLength > 30) {
weight += 4;
} else if (subdomainLength > 20) {
weight += 2;
} else if (subdomainLength > 10) {
weight += 1;
} else if (subdomain.length > 10) {
weight += 0.1;
}
if (subdomain.slice(1).includes('.')) {
weight += 1;
weight += 2;
if (subdomain.includes('www.')) {
weight += 2;
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions Source/domainset/reject_sukka.conf
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ optimus-ads.amap.com
.sitebeat.crazydomains.com
# online d'n'd website builder (https://www.infonline.de/)
.apps.iocnt.de
# codeanywhere is a heaven of abuse
.codeanyapp.com

# >> Qihoo 360
hot.m.shouji.360tpcdn.com
Expand Down

0 comments on commit c96b35b

Please sign in to comment.