From 3788ba511f4d38ec326b297a9efccabe4fa509d6 Mon Sep 17 00:00:00 2001 From: Simon L Date: Fri, 22 Nov 2024 23:56:47 +0200 Subject: [PATCH 1/3] fetch extra data for mizrahi transactions --- src/scrapers/mizrahi.ts | 96 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 5 deletions(-) diff --git a/src/scrapers/mizrahi.ts b/src/scrapers/mizrahi.ts index a50d6832..03940673 100644 --- a/src/scrapers/mizrahi.ts +++ b/src/scrapers/mizrahi.ts @@ -4,7 +4,7 @@ import { SHEKEL_CURRENCY } from '../constants'; import { pageEvalAll, waitUntilElementDisappear, waitUntilElementFound, waitUntilIframeFound, } from '../helpers/elements-interactions'; -import { fetchPostWithinPage } from '../helpers/fetch'; +import { fetchPost, fetchPostWithinPage } from '../helpers/fetch'; import { waitForUrl } from '../helpers/navigation'; import { type Transaction, @@ -20,6 +20,13 @@ interface ScrapedTransaction { MC02SchumEZ: number; MC02AsmahtaMekoritEZ: string; MC02TnuaTeurEZ: string; + MC02KodGoremEZ: string; + MC02SugTnuaKaspitEZ: string; + MC02AgidEZ: string; + MC02ErehTaaEZ: string; + MC02SeifMaralEZ: string; + MC02NoseMaralEZ: string; + TransactionNumber: string; } interface ScrapedTransactionsResult { @@ -37,6 +44,27 @@ interface ScrapedTransactionsResult { }; } +interface ExtraTransactionDetail { + Label: string; + Value: string; +} + +interface ExtraTransactionResult { + body: { + fields: [ + [ + { + Records: [ + { + Fields: ExtraTransactionDetail[]; + }, + ]; + }, + ], + ]; + }; +} + const BASE_WEBSITE_URL = 'https://www.mizrahi-tefahot.co.il'; const LOGIN_URL = `${BASE_WEBSITE_URL}/login/index.html#/auth-page-he`; const BASE_APP_URL = 'https://mto.mizrahi-tefahot.co.il'; @@ -47,6 +75,7 @@ const TRANSACTIONS_REQUEST_URLS = [ `${BASE_APP_URL}/OnlinePilot/api/SkyOSH/get428Index`, `${BASE_APP_URL}/Online/api/SkyOSH/get428Index`, ]; +const TRANSACTION_DETAILS_REQUEST_URL = `${BASE_APP_URL}/Online/api/OSH/getMaherBerurimSMF`; const PENDING_TRANSACTIONS_PAGE = '/osh/legacy/legacy-Osh-p420'; const PENDING_TRANSACTIONS_IFRAME = 'p420.aspx'; const CHANGE_PASSWORD_URL = /https:\/\/www\.mizrahi-tefahot\.co\.il\/login\/index\.html#\/change-pass/; @@ -130,6 +159,56 @@ function convertTransactions(txns: ScrapedTransaction[]): Transaction[] { }); } +async function getTransactionExtraScrap(record: ScrapedTransaction, headers: Headers): Promise { + const formattedPeulaDate = moment(record.MC02PeulaTaaEZ).format(DATE_FORMAT); + // const formattedErechDate = moment(record.MC02ErehTaaEZ).format(DATE_FORMAT); + const data = { + inKodGorem: record.MC02KodGoremEZ, + inAsmachta: record.MC02AsmahtaMekoritEZ, + inSchum: record.MC02SchumEZ, + inNakvanit: record.MC02KodGoremEZ, + inSugTnua: record.MC02SugTnuaKaspitEZ, + inAgid: record.MC02AgidEZ, + inTarPeulaFormatted: formattedPeulaDate, + inTarErechFormatted: formattedPeulaDate, + inKodNose: record.MC02SeifMaralEZ, + inKodTatNose: record.MC02NoseMaralEZ, + inTransactionNumber: record.TransactionNumber, + }; + + const res = await fetchPost(TRANSACTION_DETAILS_REQUEST_URL, data, headers); + return res; +} + +function simplifyExtraTransactionResultsToMemo(extraResult: ExtraTransactionResult): string { + let memo = ''; + extraResult.body.fields.forEach(field => + field?.forEach(group => + group?.Records.forEach(record => + record?.Fields.forEach((fieldRecord) => { + memo += `${fieldRecord.Label} ${fieldRecord.Value}; `; + }), + ), + ), + ); + return memo; +} + +async function getExtraScrap(originalRecords: ScrapedTransaction[], currentTxns: Transaction[], headers: Headers): Promise { + const promises = Object.values(originalRecords) + .map(async (record) => getTransactionExtraScrap(record, headers)); + const accounts = await Promise.all(promises); + const txnsWithExtra = currentTxns.map((txn, i) => { + const extraDetails = accounts[i]; + const currentTxn = { ...txn }; + if (extraDetails) { + currentTxn.memo = simplifyExtraTransactionResultsToMemo(extraDetails); + } + return currentTxn; + }); + return txnsWithExtra; +} + async function extractPendingTransactions(page: Frame): Promise { const pendingTxn = await pageEvalAll(page, 'tr.rgRow', [], (trs) => { return trs.map((tr) => Array.from(tr.querySelectorAll('td'), (td: HTMLTableDataCellElement) => td.textContent || '')); @@ -228,14 +307,18 @@ class MizrahiScraper extends BaseScraperWithBrowser throw new Error('Account number not found'); } + const headersMap: Record = {}; const response = await Promise.any(TRANSACTIONS_REQUEST_URLS.map(async (url) => { const request = await this.page.waitForRequest(url); const data = createDataFromRequest(request, this.options.startDate); - const headers = createHeadersFromRequest(request); + headersMap[url] = createHeadersFromRequest(request); - return fetchPostWithinPage(this.page, url, data, headers); + return fetchPostWithinPage(this.page, url, data, headersMap[url]); })); - + const cookies = await this.page.cookies(); + const headers = Object.values(headersMap)[0]; + headers.Cookie = cookies.map((cookie) => `${cookie.name}=${cookie.value}`).join('; '); + if (!response || response.header.success === false) { throw new Error(`Error fetching transaction. Response message: ${response ? response.header.messages[0].text : ''}`); } @@ -243,9 +326,12 @@ class MizrahiScraper extends BaseScraperWithBrowser const relevantRows = response.body.table.rows.filter((row) => row.RecTypeSpecified); const oshTxn = convertTransactions(relevantRows); + const oshTxnWithExtra = this.options.additionalTransactionInformation ? + await getExtraScrap(relevantRows, oshTxn, headers) : oshTxn; + // workaround for a bug which the bank's API returns transactions before the requested start date const startMoment = getStartMoment(this.options.startDate); - const oshTxnAfterStartDate = oshTxn.filter((txn) => moment(txn.date).isSameOrAfter(startMoment)); + const oshTxnAfterStartDate = oshTxnWithExtra.filter((txn) => moment(txn.date).isSameOrAfter(startMoment)); const pendingTxn = await this.getPendingTransactions(); const allTxn = oshTxnAfterStartDate.concat(pendingTxn); From 10e9efabba93d58fac8488a6e3742685144a1fc9 Mon Sep 17 00:00:00 2001 From: Simon L Date: Sat, 23 Nov 2024 00:09:38 +0200 Subject: [PATCH 2/3] using MC02ErehTaaEZ fails to retrieve data as opposed to using MC02PeulaTaaEZ in both date fields --- src/scrapers/mizrahi.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/scrapers/mizrahi.ts b/src/scrapers/mizrahi.ts index 03940673..7d662648 100644 --- a/src/scrapers/mizrahi.ts +++ b/src/scrapers/mizrahi.ts @@ -23,7 +23,6 @@ interface ScrapedTransaction { MC02KodGoremEZ: string; MC02SugTnuaKaspitEZ: string; MC02AgidEZ: string; - MC02ErehTaaEZ: string; MC02SeifMaralEZ: string; MC02NoseMaralEZ: string; TransactionNumber: string; @@ -161,7 +160,6 @@ function convertTransactions(txns: ScrapedTransaction[]): Transaction[] { async function getTransactionExtraScrap(record: ScrapedTransaction, headers: Headers): Promise { const formattedPeulaDate = moment(record.MC02PeulaTaaEZ).format(DATE_FORMAT); - // const formattedErechDate = moment(record.MC02ErehTaaEZ).format(DATE_FORMAT); const data = { inKodGorem: record.MC02KodGoremEZ, inAsmachta: record.MC02AsmahtaMekoritEZ, From 2b9db83846db787144b68dc4b91889fbaa6cdaa1 Mon Sep 17 00:00:00 2001 From: Simon L Date: Sat, 23 Nov 2024 17:22:41 +0200 Subject: [PATCH 3/3] configurable concurency for transaction extras --- src/scrapers/mizrahi.ts | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/scrapers/mizrahi.ts b/src/scrapers/mizrahi.ts index 7d662648..25b27b0b 100644 --- a/src/scrapers/mizrahi.ts +++ b/src/scrapers/mizrahi.ts @@ -13,6 +13,7 @@ import { } from '../transactions'; import { BaseScraperWithBrowser, LoginResults, type PossibleLoginResults } from './base-scraper-with-browser'; import { ScraperErrorTypes } from './errors'; +import { sleep } from '../helpers/waiting'; interface ScrapedTransaction { RecTypeSpecified: boolean; @@ -25,6 +26,7 @@ interface ScrapedTransaction { MC02AgidEZ: string; MC02SeifMaralEZ: string; MC02NoseMaralEZ: string; + MC02ShowDetailsEZ: string; TransactionNumber: string; } @@ -80,6 +82,8 @@ const PENDING_TRANSACTIONS_IFRAME = 'p420.aspx'; const CHANGE_PASSWORD_URL = /https:\/\/www\.mizrahi-tefahot\.co\.il\/login\/index\.html#\/change-pass/; const DATE_FORMAT = 'DD/MM/YYYY'; const MAX_ROWS_PER_REQUEST = 10000000000; +const TRANSACTION_DETAILS_REQUEST_CONCURRENCY = 1; +const TRANSACTION_DETAILS_REQUEST_WAIT_TIME = 500; // ms const usernameSelector = '#emailDesktopHeb'; const passwordSelector = '#passwordIDDesktopHEB'; @@ -158,7 +162,7 @@ function convertTransactions(txns: ScrapedTransaction[]): Transaction[] { }); } -async function getTransactionExtraScrap(record: ScrapedTransaction, headers: Headers): Promise { +async function getTransactionExtraScrap(record: ScrapedTransaction, headers: Headers): Promise { const formattedPeulaDate = moment(record.MC02PeulaTaaEZ).format(DATE_FORMAT); const data = { inKodGorem: record.MC02KodGoremEZ, @@ -174,8 +178,13 @@ async function getTransactionExtraScrap(record: ScrapedTransaction, headers: Hea inTransactionNumber: record.TransactionNumber, }; - const res = await fetchPost(TRANSACTION_DETAILS_REQUEST_URL, data, headers); - return res; + try { + const res = await fetchPost(TRANSACTION_DETAILS_REQUEST_URL, data, headers); + return res; + } catch (e) { + console.error(`Error fetching extra transaction details for record ${JSON.stringify(record)}`, e); + } + return null; } function simplifyExtraTransactionResultsToMemo(extraResult: ExtraTransactionResult): string { @@ -193,11 +202,21 @@ function simplifyExtraTransactionResultsToMemo(extraResult: ExtraTransactionResu } async function getExtraScrap(originalRecords: ScrapedTransaction[], currentTxns: Transaction[], headers: Headers): Promise { - const promises = Object.values(originalRecords) - .map(async (record) => getTransactionExtraScrap(record, headers)); - const accounts = await Promise.all(promises); + const recordsWithDetails = originalRecords + .map((record, index) => ({ record, index })) + .filter(({ record }) => record.MC02ShowDetailsEZ === '1'); + + const promises = recordsWithDetails.map(({ record }) => getTransactionExtraScrap(record, headers)); + let accounts: Array = []; + while (promises.length > 0) { + const currentPromises = promises.splice(0, TRANSACTION_DETAILS_REQUEST_CONCURRENCY); + accounts = accounts.concat(await Promise.all(currentPromises)); + await sleep(TRANSACTION_DETAILS_REQUEST_WAIT_TIME); + } + const txnsWithExtra = currentTxns.map((txn, i) => { - const extraDetails = accounts[i]; + const extraDetailIndex = recordsWithDetails.findIndex(({ index }) => index === i); + const extraDetails = extraDetailIndex !== -1 ? accounts[extraDetailIndex] : undefined; const currentTxn = { ...txn }; if (extraDetails) { currentTxn.memo = simplifyExtraTransactionResultsToMemo(extraDetails);