-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenerate.js
39 lines (34 loc) · 1.02 KB
/
generate.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import Crawler from "js-crawler";
// set in terminal: export URL="https://XXXXX.com/"
const URL = process.env.URL;
let urlsCrawledCount = 0;
function isSubredditUrl(url) {
return !!url.includes(URL);
}
var crawler = new Crawler().configure({
shouldCrawl: function (url) {
return isSubredditUrl(url) || url == URL;
},
// Also possible to configure maximum 1 request per 10 seconds
// maxRequestsPerSecond: 0.1
maxRequestsPerSecond: 2,
maxConcurrentRequests: 3,
depth: 3
});
crawler.crawl(URL,
function onSuccess(page) {
console.log(page.url);
urlsCrawledCount++;
if(urlsCrawledCount % 100 === 0) {
console.log("Generated: " + urlsCrawledCount);
}
},
function onFailure(page) {
console.log("ERROR Fetch(" + page.url + ") status = " + page.status);
urlsCrawledCount++;
},
function onAllFinished(crawledUrls) {
console.log('All generating finished');
console.log('Urls generated = ' + urlsCrawledCount);
}
);