Skip to content

Commit

Permalink
refactor: scrapers
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Wang committed Dec 29, 2023
1 parent 1e6754c commit f885817
Show file tree
Hide file tree
Showing 71 changed files with 491,279 additions and 476,836 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"homepage": "https://github.com/chunkai1312/node-twstock#readme",
"dependencies": {
"axios": "^1.5.1",
"axios-rate-limit": "^1.3.0",
"cheerio": "^1.0.0-rc.12",
"csvtojson": "^2.0.10",
"iconv-lite": "^0.6.3",
Expand Down
1 change: 1 addition & 0 deletions src/enums/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ export * from './index.enum';
export * from './industry.enum';
export * from './market.enum';
export * from './futopt.enum';
export * from './scraper.enum';
9 changes: 9 additions & 0 deletions src/enums/scraper.enum.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export enum Scraper {
Twse = 'twse',
Tpex = 'tpex',
Taifex = 'taifex',
Tdcc = 'tdcc',
Mis = 'mis',
Mops = 'mops',
Isin = 'isin',
}
1 change: 1 addition & 0 deletions src/interfaces/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export * from './ticker.interface';
export * from './rate-limit-options.interface';
4 changes: 4 additions & 0 deletions src/interfaces/rate-limit-options.interface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export interface RateLimitOptions {
ttl: number;
limit: number;
}
24 changes: 9 additions & 15 deletions src/scrapers/index.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
import { IsinScraper as IsinScraperStatic } from './isin-scraper';
import { TwseScraper as TwseScraperStatic } from './twse-scraper';
import { TpexScraper as TpexScraperStatic } from './tpex-scraper';
import { MisScraper as MisScraperStatic } from './mis-scraper';
import { TdccScraper as TdccScraperStatic } from './tdcc-scraper';
import { MopsScraper as MopsScraperStatic } from './mops-scraper';
import { TaifexScraper as TaifexScraperStatic } from './taifex-scraper';

export const IsinScraper = new IsinScraperStatic();
export const TwseScraper = new TwseScraperStatic();
export const TpexScraper = new TpexScraperStatic();
export const MisScraper = new MisScraperStatic();
export const TdccScraper = new TdccScraperStatic();
export const MopsScraper = new MopsScraperStatic();
export const TaifexScraper = new TaifexScraperStatic();
export * from './scraper';
export * from './twse-scraper';
export * from './tpex-scraper';
export * from './taifex-scraper';
export * from './tdcc-scraper';
export * from './mis-scraper';
export * from './mops-scraper';
export * from './isin-scraper';
export * from './scraper-factory';
16 changes: 10 additions & 6 deletions src/scrapers/isin-scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@ export class IsinScraper extends Scraper {
const page = iconv.decode(response.data, 'big5');
const $ = cheerio.load(page);

return $('.h4 tr').slice(1).map((_, el) => {
const data = $('.h4 tr').slice(1).map((_, el) => {
const td = $(el).find('td');
return {
symbol: td.eq(2).text().trim(),
name: td.eq(3).text().trim(),
exchange: asExchange(td.eq(4).text().trim()),
market: asMarket(td.eq(4).text().trim()),
industry: asIndustry(td.eq(6).text().trim()),
listedDate: DateTime.fromFormat(td.eq(7).text().trim(), 'yyyy/MM/dd').toISODate() as string,
};
listedDate: DateTime.fromFormat(td.eq(7).text().trim(), 'yyyy/MM/dd').toISODate(),
} as Record<string, any>;
}).toArray();

return data;
}

async fetchListedStocks(options: { market: 'TSE' | 'OTC' }) {
Expand All @@ -36,16 +38,18 @@ export class IsinScraper extends Scraper {
const page = iconv.decode(response.data, 'big5');
const $ = cheerio.load(page);

return $('.h4 tr').slice(1).map((_, el) => {
const data = $('.h4 tr').slice(1).map((_, el) => {
const td = $(el).find('td');
return {
symbol: td.eq(2).text().trim(),
name: td.eq(3).text().trim(),
exchange: asExchange(td.eq(4).text().trim()),
market: asMarket(td.eq(4).text().trim()),
industry: asIndustry(td.eq(6).text().trim()),
listedDate: DateTime.fromFormat(td.eq(7).text().trim(), 'yyyy/MM/dd').toISODate() as string,
};
listedDate: DateTime.fromFormat(td.eq(7).text().trim(), 'yyyy/MM/dd').toISODate(),
} as Record<string, any>;
}).toArray();

return data;
}
}
16 changes: 11 additions & 5 deletions src/scrapers/mis-scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ export class MisScraper extends Scraper {
const json = (response.data.rtmessage === 'OK') && response.data;
if (!json) return null;

return json.msgArray.map((row: any) => ({
const data = json.msgArray.map((row: any) => ({
symbol: asIndex(row.n) ?? (row.ch).replace('.tw', ''),
exchange: asExchange(row.ex.toUpperCase() as Market),
market: row.ex.toUpperCase(),
name: row.n,
ex_ch: `${row.ex}_${row.ch}`,
}));

return data;
}

async fetchStocksQuote(options: { ticker: Ticker, odd?: boolean }) {
Expand All @@ -42,7 +44,7 @@ export class MisScraper extends Scraper {
const json = (response.data.rtmessage === 'OK') && response.data;
if (!json) return null;

return json.msgArray.map((row: any) => ({
const data = json.msgArray.map((row: any) => ({
date: DateTime.fromFormat(row.d, 'yyyyMMdd').toISODate(),
symbol: row.c,
name: row.n,
Expand All @@ -60,7 +62,9 @@ export class MisScraper extends Scraper {
bidSize: row.g && row.g.split('_').slice(0, -1).map((size: string) => numeral(size).value()),
askSize: row.f && row.f.split('_').slice(0, -1).map((size: string) => numeral(size).value()),
lastUpdated: row.tlong && numeral(row.tlong).value(),
}));
})) as Record<string, any>[];

return data.find(row => row.symbol === ticker.symbol);
}

async fetchIndicesQuote(options: { ticker: Ticker }) {
Expand All @@ -74,7 +78,7 @@ export class MisScraper extends Scraper {
const json = (response.data.rtmessage === 'OK') && response.data;
if (!json) return null;

return json.msgArray.map((row: any) => ({
const data = json.msgArray.map((row: any) => ({
date: DateTime.fromFormat(row.d, 'yyyyMMdd').toISODate(),
symbol: ticker.symbol,
name: row.n,
Expand All @@ -85,7 +89,9 @@ export class MisScraper extends Scraper {
close: row.z && numeral(row.z).value(),
volume: row.v && numeral(row.v).value(),
lastUpdated: row.tlong && numeral(row.tlong).value(),
}));
})) as Record<string, any>[];

return data.find(row => row.symbol === ticker.symbol);
}

private extractExChFromTicker(ticker: Ticker) {
Expand Down
26 changes: 16 additions & 10 deletions src/scrapers/mops-scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import * as numeral from 'numeral';
import { Scraper } from './scraper';

export class MopsScraper extends Scraper {
async fetchStocksEps(options: { market: 'TSE' | 'OTC', year: number, quarter: number }) {
const { market, year, quarter } = options;
const type = { 'TSE': 'sii', 'OTC': 'otc' };
async fetchStocksEps(options: { market: string, year: number, quarter: number, symbol?: string }) {
const { market, year, quarter, symbol } = options;
const type: Record<string, string> = { 'TSE': 'sii', 'OTC': 'otc' };
const form = new URLSearchParams({
encodeURIComponent: '1',
step: '1',
Expand All @@ -19,7 +19,10 @@ export class MopsScraper extends Scraper {
season: numeral(quarter).format('00'),
});
const url = 'https://mops.twse.com.tw/mops/web/t163sb04';

const response = await this.httpService.post(url, form);
if (response.data.includes('查詢無資料!')) return null;

const $ = cheerio.load(response.data);

const data = $('.even,.odd').map((_, el) => {
Expand All @@ -28,18 +31,21 @@ export class MopsScraper extends Scraper {
const name = td.eq(1).text().trim();
const eps = numeral(td.eq(td.length - 1).text().trim()).value();
return { symbol, name, eps, year, quarter };
}).toArray();
}).toArray() as Record<string, any>[];

return _.sortBy(data, 'symbol');
return symbol ? data.find(data => data.symbol === symbol) : _.sortBy(data, 'symbol');
}

async fetchStocksRevenue(options: { market: 'TSE' | 'OTC', year: number, month: number, foreign?: boolean }) {
const { market, year, month, foreign = false } = options;
const type = { 'TSE': 'sii', 'OTC': 'otc' };
async fetchStocksRevenue(options: { market: string, year: number, month: number, foreign?: boolean, symbol?: string }) {
const { market, year, month, foreign = false, symbol } = options;
const type: Record<string, string> = { 'TSE': 'sii', 'OTC': 'otc' };
const suffix = `${numeral(year).subtract(1911).value()}_${month}_${+foreign}`;
const url = `https://mops.twse.com.tw/nas/t21/${type[market]}/t21sc03_${suffix}.html`;

const response = await this.httpService.get(url, { responseType: 'arraybuffer' });
const page = iconv.decode(response.data, 'big5');
if (page.toString().includes('查無資料')) return null;

const $ = cheerio.load(page);

const data = $('tr [align=right]')
Expand All @@ -55,9 +61,9 @@ export class MopsScraper extends Scraper {
const revenue = numeral(td.eq(2).text().trim()).value();
return { symbol, name, revenue, year, month };
})
.toArray();
.toArray() as Record<string, any>[];

return _.sortBy(data, 'symbol');
return symbol ? data.find(data => data.symbol === symbol) : _.sortBy(data, 'symbol');
}
}

Expand Down
66 changes: 66 additions & 0 deletions src/scrapers/scraper-factory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { TwseScraper } from './twse-scraper';
import { TpexScraper } from './tpex-scraper';
import { TaifexScraper } from './taifex-scraper';
import { TdccScraper } from './tdcc-scraper';
import { MisScraper } from './mis-scraper';
import { MopsScraper } from './mops-scraper';
import { IsinScraper } from './isin-scraper';
import { Scraper } from './scraper';
import { Scraper as ScraperType } from '../enums';
import { RateLimitOptions } from '../interfaces';

export class ScraperFactory {
private readonly scrapers: Map<string, Scraper> = new Map();

constructor(private readonly options?: RateLimitOptions) {}

get(type: ScraperType) {
let scraper = this.scrapers.get(type);

if (!scraper) {
const scrapers = {
[ScraperType.Twse]: TwseScraper,
[ScraperType.Tpex]: TpexScraper,
[ScraperType.Taifex]: TaifexScraper,
[ScraperType.Tdcc]: TdccScraper,
[ScraperType.Mis]: MisScraper,
[ScraperType.Mops]: MopsScraper,
[ScraperType.Isin]: IsinScraper,
};
const ScraperClass = scrapers[type];

scraper = new ScraperClass(this.options);
this.scrapers.set(type, scraper);
}

return scraper;
}

getTwseScraper() {
return this.get(ScraperType.Twse) as TwseScraper;
}

getTpexScraper() {
return this.get(ScraperType.Tpex) as TpexScraper;
}

getTaifexScraper() {
return this.get(ScraperType.Taifex) as TaifexScraper;
}

getTdccScraper() {
return this.get(ScraperType.Tdcc) as TdccScraper;
}

getMisScraper() {
return this.get(ScraperType.Mis) as MisScraper;
}

getMopsScraper() {
return this.get(ScraperType.Mops) as MopsScraper;
}

getIsinScraper() {
return this.get(ScraperType.Isin) as IsinScraper;
}
}
9 changes: 7 additions & 2 deletions src/scrapers/scraper.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import axios, { AxiosInstance } from 'axios';
import * as rateLimit from 'axios-rate-limit';
import { RateLimitOptions } from '../interfaces';

export abstract class Scraper {
protected readonly httpService: AxiosInstance;

constructor() {
this.httpService = axios.create();
constructor(options?: RateLimitOptions) {
const maxRequests = options?.limit ?? 3;
const perMilliseconds = options?.ttl ?? 5000;
// @ts-ignore
this.httpService = rateLimit(axios.create(), { maxRequests, perMilliseconds });
}
}
Loading

0 comments on commit f885817

Please sign in to comment.