diff options
author | Kenneth Heafield <github@kheafield.com> | 2017-04-02 22:21:38 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2017-04-02 22:21:38 +0300 |
commit | f293fd958293a18984510d4d4e447658c02ab61d (patch) | |
tree | a5694769c372d5ec83badb4870a2511f6be78667 /util | |
parent | e6a600c8ac4062a9e6644f91232d3ba09469c4f4 (diff) |
Use std::find to locate newline.
Benchmark on lofn to count lines in
xzcat /fs/vali0/www/data.statmt.org/ngrams/deduped_en/en.00.xz |head
-n 10000000
With std::find
real 0m0.709s
user 0m0.648s
sys 0m0.060s
Baseline
real 0m0.899s
user 0m0.820s
sys 0m0.076s
C++
int main() {
std::ios::sync_with_stdio(false);
std::string line;
while (getline(std::cin, line)) {}
}
real 0m0.803s
user 0m0.536s
sys 0m0.264s
Diffstat (limited to 'util')
-rw-r--r-- | util/file_piece.cc | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/util/file_piece.cc b/util/file_piece.cc index b6cb8c9..8f6278f 100644 --- a/util/file_piece.cc +++ b/util/file_piece.cc @@ -73,17 +73,16 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf StringPiece FilePiece::ReadLine(char delim, bool strip_cr) { std::size_t skip = 0; while (true) { - for (const char *i = position_ + skip; i < position_end_; ++i) { - if (*i == delim) { - // End of line. - // Take 1 byte off the end if it's an unwanted carriage return. - const std::size_t subtract_cr = ( - (strip_cr && i > position_ && *(i - 1) == '\r') ? - 1 : 0); - StringPiece ret(position_, i - position_ - subtract_cr); - position_ = i + 1; - return ret; - } + const char *i = std::find(position_ + skip, position_end_, delim); + if (UTIL_LIKELY(i != position_end_)) { + // End of line. + // Take 1 byte off the end if it's an unwanted carriage return. + const std::size_t subtract_cr = ( + (strip_cr && i > position_ && *(i - 1) == '\r') ? + 1 : 0); + StringPiece ret(position_, i - position_ - subtract_cr); + position_ = i + 1; + return ret; } if (at_end_) { if (position_ == position_end_) { |