1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#include "afio_pch.hpp"
#include <iostream>
#include <fstream>
#include <regex>
#include <chrono>
#if BOOST_AFIO_USE_BOOST_FILESYSTEM
#include "boost/filesystem/fstream.hpp"
#endif
/* My Intel Core i7 3770K running Windows 8 x64 with 7200rpm drive, using
Sysinternals RAMMap to clear disc cache (http://technet.microsoft.com/en-us/sysinternals/ff700229.aspx)
Single threaded, warm cache:
92 files matched out of 39279 files which was 7093894518 bytes.
The search took 8.32834 seconds which was 4716.3 files per second or 812.318 Mb/sec.
Single threaded, cold cache:
91 files matched out of 38967 files which was 6170927489 bytes.
The search took 369.046 seconds which was 105.588 files per second or 15.9467 Mb/sec.
OpenMP, warm cache:
92 files matched out of 38943 files which was 7092655881 bytes.
The search took 3.73611 seconds which was 10423.4 files per second or 1810.46 Mb/sec.
OpenMP, cold cache:
91 files matched out of 38886 files which was 6170656567 bytes.
The search took 741.131 seconds which was 52.4684 files per second or 7.94029 Mb/sec.
*/
//[find_in_files_iostreams
int main(int argc, const char *argv[])
{
using namespace std;
namespace filesystem = boost::afio::filesystem;
#if BOOST_AFIO_USE_BOOST_FILESYSTEM
using boost::filesystem::ifstream;
#endif
typedef chrono::duration<double, ratio<1, 1>> secs_type;
if(argc<2)
{
cerr << "ERROR: Specify a regular expression to search all files in the current directory." << endl;
return 1;
}
// Prime SpeedStep
auto begin=chrono::high_resolution_clock::now();
while(chrono::duration_cast<secs_type>(chrono::high_resolution_clock::now()-begin).count()<1);
size_t bytesread=0, filesread=0, filesmatched=0;
try
{
begin=chrono::high_resolution_clock::now();
// Generate a list of all files here and below the current working directory
vector<filesystem::path> filepaths;
for(auto it=filesystem::recursive_directory_iterator("."); it!=filesystem::recursive_directory_iterator(); ++it)
{
if(it->status().type()!=
#ifdef BOOST_AFIO_USE_LEGACY_FILESYSTEM_SEMANTICS
filesystem::file_type::regular_file)
#else
filesystem::file_type::regular)
#endif
continue;
filepaths.push_back(it->path());
}
// Compile the regular expression, and have OpenMP parallelise the loop
regex regexpr(argv[1]);
#pragma omp parallel for schedule(dynamic)
for(int n=0; n<(int) filepaths.size(); n++)
{
// Open the file
ifstream s(filepaths[n], ifstream::binary);
s.exceptions(fstream::failbit | fstream::badbit); // Turn on exception throwing
// Get its length
s.seekg(0, ios::end);
size_t length=(size_t) s.tellg();
s.seekg(0, ios::beg);
// Allocate a sufficient buffer, avoiding the byte clearing vector would do
unique_ptr<char[]> buffer(new char[length+1]);
// Read in the file, terminating with zero
s.read(buffer.get(), length);
buffer.get()[length]=0;
// Search the buffer for the regular expression
if(regex_search(buffer.get(), regexpr))
{
#pragma omp critical
{
cout << filepaths[n] << endl;
}
filesmatched++;
}
filesread++;
bytesread+=length;
}
auto end=chrono::high_resolution_clock::now();
auto diff=chrono::duration_cast<secs_type>(end-begin);
cout << "\n" << filesmatched << " files matched out of " << filesread << " files which was "
<< bytesread << " bytes." << endl;
cout << "The search took " << diff.count() << " seconds which was " << filesread/diff.count()
<< " files per second or " << (bytesread/diff.count()/1024/1024) << " Mb/sec." << endl;
}
catch(...)
{
cerr << boost::current_exception_diagnostic_information(true) << endl;
return 1;
}
return 0;
}
//]
|