-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.h
39 lines (33 loc) · 921 Bytes
/
index.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#include <fstream>
#include <set>
#include <vector>
#include <iostream>
#include <regex>
using namespace std;
class Index {
set<string> index;
public:
Index() {
ifstream in("enwiki-20190501-pages-articles-multistream-index.txt");
int current = 0;
while (!in.eof()) {
string line;
getline(in, line);
vector<int> loc;
for (int i = 0; i < line.length(); i++)
if (line[i] == ':')
loc.push_back(i);
if (loc.size() >= 2) {
string title = line.substr(loc[1] + 1, line.length() - loc[1] - 1);
transform(title.begin(), title.end(), title.begin(), ::tolower);
index.insert(title);
current++;
}
if(current % 1000000 == 0)
cout << "Index loaded: " << current << endl;
}
in.close();
cout << "Index loaded!" << endl;
}
bool lookup(const string &s) const { return index.find(s) != index.end(); }
};