-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsearch-huge-file-for.cpp
56 lines (44 loc) · 1.37 KB
/
search-huge-file-for.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include <chrono>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
const uint64_t BLOCK_SIZE = 1048576;
using namespace std;
int main(int argc, char *argv[]) {
if (argc != 3) {
cout << "Really really really fast search for a fixed string in a huge file, gives byte offset" << endl;
cout << "Usage: " << argv[0] << " SEARCH_STRING FILE" << endl;
cout << "CAVEAT: reads the file in 1MB blocks, so in theory it might miss your string if it happens to span a block split" << endl;
return 1;
}
string search = argv[1];
string filename = argv[2];
cout << "Searching for " << search << " in " << filename << endl;
string block(BLOCK_SIZE, ' ');
ifstream file(filename, ios::binary);
uint64_t offset = 0;
while (!file.eof()) {
file.read(&block[0], BLOCK_SIZE);
size_t pos = block.find(search);
if (pos != string::npos) {
offset += pos;
file.close();
file.open(filename);
file.seekg(offset);
file.read(&block[0], BLOCK_SIZE);
cout << "Found at offset " << offset << endl;
cout << endl;
cout << "Context is: " << endl;
cout << block << endl << endl;
break;
}
offset += BLOCK_SIZE;
if (offset % 1000000000 < BLOCK_SIZE) {
cout << "Searched " << offset / 1000000000 << " GB" << endl;
}
}
return 0;
}