From 213cd39b6db387bd88f12589fd50ff0e6563cf56 Mon Sep 17 00:00:00 2001 From: Aras Pranckevicius Date: Sun, 17 Apr 2022 22:07:43 +0300 Subject: OBJ: further optimize, cleanup and harden the new C++ importer Continued improvements to the new C++ based OBJ importer. Performance: about 2x faster. - Rungholt.obj (several meshes, 263MB file): Windows 12.7s -> 5.9s, Mac 7.7s -> 3.1s. - Blender 3.0 splash (24k meshes, 2.4GB file): Windows 97.3s -> 53.6s, Mac 137.3s -> 80.0s. - "Windows" is VS2022, AMD Ryzen 5950X (32 threads), "Mac" is Xcode/clang 13, M1Max (10 threads). - Slightly reduced memory usage during import as well. The performance gains are a combination of several things: - Replacing `std::stof` / `std::stoi` with C++17 `from_chars`. - Stop reading input file char-by-char using `std::getline`, and instead read in 64kb chunks, and parse from there (taking care of possibly handling lines split mid-way due to chunk boundaries). - Removing abstractions for splitting a line by some char, - Avoid tiny memory allocations: instead of storing a vector of polygon corners in each face, store all the corners in one big array, and per-face only store indices "where do corners start, and how many". Likewise, don't store full string names of material/group names for each face; only store indices into overall material/group names arrays. - Stop always doing mesh validation, which is slow. Do it just like the Alembic importer does: only do validation if found some invalid faces during import, or if requested by the user via an import setting checkbox (which defaults to off). - Stop doing "collection sync" for each object being added; instead do the collection sync right after creating all the objects. Cleanup / Robustness: This reworking of parser (see "removing abstractions" point above) means that all the functions that were in `parser_string_utils` file are gone, and replaced with different set of functions. However they are not OBJ specific, so as pointed out during review of the previous differential, they are now in `source/blender/io/common` library. Added gtest coverage for said functions as well; something that was only indirectly covered by obj tests previously. Rework of some bits of parsing made the parser actually better able to deal with invalid syntax. E.g. previously, if a face corner were a `/123` string, it would have incorrectly treated that as a vertex index (since it would get "hey that's one number" after splitting a string by a slash), instead of properly marking it as invalid syntax. Added gtest coverage for .mtl parsing; something that was not covered by any tests at all previously. Reviewed By: Howard Trickey Differential Revision: https://developer.blender.org/D14586 --- source/blender/io/common/CMakeLists.txt | 5 + source/blender/io/common/IO_string_utils.hh | 69 ++++++++++++ source/blender/io/common/intern/string_utils.cc | 99 +++++++++++++++++ .../blender/io/common/intern/string_utils_test.cc | 118 +++++++++++++++++++++ 4 files changed, 291 insertions(+) create mode 100644 source/blender/io/common/IO_string_utils.hh create mode 100644 source/blender/io/common/intern/string_utils.cc create mode 100644 source/blender/io/common/intern/string_utils_test.cc (limited to 'source/blender/io/common') diff --git a/source/blender/io/common/CMakeLists.txt b/source/blender/io/common/CMakeLists.txt index 02bd5b2b81f..b1add38bf01 100644 --- a/source/blender/io/common/CMakeLists.txt +++ b/source/blender/io/common/CMakeLists.txt @@ -7,6 +7,8 @@ set(INC ../../blenlib ../../depsgraph ../../makesdna + ../../../../intern/guardedalloc + ../../../../extern/fast_float ) set(INC_SYS @@ -17,9 +19,11 @@ set(SRC intern/dupli_parent_finder.cc intern/dupli_persistent_id.cc intern/object_identifier.cc + intern/string_utils.cc IO_abstract_hierarchy_iterator.h IO_dupli_persistent_id.hh + IO_string_utils.hh IO_types.h intern/dupli_parent_finder.hh ) @@ -38,6 +42,7 @@ if(WITH_GTESTS) intern/abstract_hierarchy_iterator_test.cc intern/hierarchy_context_order_test.cc intern/object_identifier_test.cc + intern/string_utils_test.cc ) set(TEST_INC ../../blenloader diff --git a/source/blender/io/common/IO_string_utils.hh b/source/blender/io/common/IO_string_utils.hh new file mode 100644 index 00000000000..25f1f01c6ed --- /dev/null +++ b/source/blender/io/common/IO_string_utils.hh @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "BLI_string_ref.hh" + +/* + * Various text parsing utilities commonly used by text-based input formats. + */ + +namespace blender::io { + +/** + * Fetches next line from an input string buffer. + * + * The returned line will not have '\n' characters at the end; + * the `buffer` is modified to contain remaining text without + * the input line. + * + * Note that backslash (\) character is treated as a line + * continuation, similar to OBJ file format or a C preprocessor. + */ +StringRef read_next_line(StringRef &buffer); + +/** + * Drop leading white-space from a StringRef. + * Note that backslash character is considered white-space. + */ +StringRef drop_whitespace(StringRef str); + +/** + * Drop leading non-white-space from a StringRef. + * Note that backslash character is considered white-space. + */ +StringRef drop_non_whitespace(StringRef str); + +/** + * Parse an integer from an input string. + * The parsed result is stored in `dst`. The function skips + * leading white-space unless `skip_space=false`. If the + * number can't be parsed (invalid syntax, out of range), + * `fallback` value is stored instead. + * + * Returns the remainder of the input string after parsing. + */ +StringRef parse_int(StringRef str, int fallback, int &dst, bool skip_space = true); + +/** + * Parse a float from an input string. + * The parsed result is stored in `dst`. The function skips + * leading white-space unless `skip_space=false`. If the + * number can't be parsed (invalid syntax, out of range), + * `fallback` value is stored instead. + * + * Returns the remainder of the input string after parsing. + */ +StringRef parse_float(StringRef str, float fallback, float &dst, bool skip_space = true); + +/** + * Parse a number of white-space separated floats from an input string. + * The parsed `count` numbers are stored in `dst`. If a + * number can't be parsed (invalid syntax, out of range), + * `fallback` value is stored instead. + * + * Returns the remainder of the input string after parsing. + */ +StringRef parse_floats(StringRef str, float fallback, float *dst, int count); + +} // namespace blender::io diff --git a/source/blender/io/common/intern/string_utils.cc b/source/blender/io/common/intern/string_utils.cc new file mode 100644 index 00000000000..01107b0866e --- /dev/null +++ b/source/blender/io/common/intern/string_utils.cc @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "IO_string_utils.hh" + +/* Note: we could use C++17 from_chars to parse + * floats, but even if some compilers claim full support, + * their standard libraries are not quite there yet. + * LLVM/libc++ only has a float parser since LLVM 14, + * and gcc/libstdc++ since 11.1. So until at least these are + * the mininum spec, use an external library. */ +#include "fast_float.h" +#include + +namespace blender::io { + +StringRef read_next_line(StringRef &buffer) +{ + const char *start = buffer.begin(); + const char *end = buffer.end(); + size_t len = 0; + char prev = 0; + const char *ptr = start; + while (ptr < end) { + char c = *ptr++; + if (c == '\n' && prev != '\\') { + break; + } + prev = c; + ++len; + } + + buffer = StringRef(ptr, end); + return StringRef(start, len); +} + +static bool is_whitespace(char c) +{ + return c <= ' ' || c == '\\'; +} + +StringRef drop_whitespace(StringRef str) +{ + while (!str.is_empty() && is_whitespace(str[0])) { + str = str.drop_prefix(1); + } + return str; +} + +StringRef drop_non_whitespace(StringRef str) +{ + while (!str.is_empty() && !is_whitespace(str[0])) { + str = str.drop_prefix(1); + } + return str; +} + +static StringRef drop_plus(StringRef str) +{ + if (!str.is_empty() && str[0] == '+') { + str = str.drop_prefix(1); + } + return str; +} + +StringRef parse_float(StringRef str, float fallback, float &dst, bool skip_space) +{ + if (skip_space) { + str = drop_whitespace(str); + } + str = drop_plus(str); + fast_float::from_chars_result res = fast_float::from_chars(str.begin(), str.end(), dst); + if (res.ec == std::errc::invalid_argument || res.ec == std::errc::result_out_of_range) { + dst = fallback; + } + return StringRef(res.ptr, str.end()); +} + +StringRef parse_floats(StringRef str, float fallback, float *dst, int count) +{ + for (int i = 0; i < count; ++i) { + str = parse_float(str, fallback, dst[i]); + } + return str; +} + +StringRef parse_int(StringRef str, int fallback, int &dst, bool skip_space) +{ + if (skip_space) { + str = drop_whitespace(str); + } + str = drop_plus(str); + std::from_chars_result res = std::from_chars(str.begin(), str.end(), dst); + if (res.ec == std::errc::invalid_argument || res.ec == std::errc::result_out_of_range) { + dst = fallback; + } + return StringRef(res.ptr, str.end()); +} + +} // namespace blender::io diff --git a/source/blender/io/common/intern/string_utils_test.cc b/source/blender/io/common/intern/string_utils_test.cc new file mode 100644 index 00000000000..a78bd7ab8a3 --- /dev/null +++ b/source/blender/io/common/intern/string_utils_test.cc @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "IO_string_utils.hh" + +#include "testing/testing.h" + +namespace blender::io { + +#define EXPECT_STRREF_EQ(str1, str2) EXPECT_STREQ(str1, std::string(str2).c_str()) + +TEST(string_utils, read_next_line) +{ + std::string str = "abc\n \n\nline with \\\ncontinuation\nCRLF ending:\r\na"; + StringRef s = str; + EXPECT_STRREF_EQ("abc", read_next_line(s)); + EXPECT_STRREF_EQ(" ", read_next_line(s)); + EXPECT_STRREF_EQ("", read_next_line(s)); + EXPECT_STRREF_EQ("line with \\\ncontinuation", read_next_line(s)); + EXPECT_STRREF_EQ("CRLF ending:\r", read_next_line(s)); + EXPECT_STRREF_EQ("a", read_next_line(s)); + EXPECT_TRUE(s.is_empty()); +} + +TEST(string_utils, drop_whitespace) +{ + /* Empty */ + EXPECT_STRREF_EQ("", drop_whitespace("")); + /* Only whitespace */ + EXPECT_STRREF_EQ("", drop_whitespace(" ")); + EXPECT_STRREF_EQ("", drop_whitespace(" ")); + EXPECT_STRREF_EQ("", drop_whitespace(" \t\n\r ")); + /* Drops leading whitespace */ + EXPECT_STRREF_EQ("a", drop_whitespace(" a")); + EXPECT_STRREF_EQ("a b", drop_whitespace(" a b")); + EXPECT_STRREF_EQ("a b ", drop_whitespace(" a b ")); + /* No leading whitespace */ + EXPECT_STRREF_EQ("c", drop_whitespace("c")); + /* Case with backslash, should be treated as whitespace */ + EXPECT_STRREF_EQ("d", drop_whitespace(" \\ d")); +} + +TEST(string_utils, parse_int_valid) +{ + std::string str = "1 -10 \t 1234 1234567890 +7 123a"; + StringRef s = str; + int val; + s = parse_int(s, 0, val); + EXPECT_EQ(1, val); + s = parse_int(s, 0, val); + EXPECT_EQ(-10, val); + s = parse_int(s, 0, val); + EXPECT_EQ(1234, val); + s = parse_int(s, 0, val); + EXPECT_EQ(1234567890, val); + s = parse_int(s, 0, val); + EXPECT_EQ(7, val); + s = parse_int(s, 0, val); + EXPECT_EQ(123, val); + EXPECT_STRREF_EQ("a", s); +} + +TEST(string_utils, parse_int_invalid) +{ + int val; + /* Invalid syntax */ + EXPECT_STRREF_EQ("--123", parse_int("--123", -1, val)); + EXPECT_EQ(val, -1); + EXPECT_STRREF_EQ("foobar", parse_int("foobar", -2, val)); + EXPECT_EQ(val, -2); + /* Out of integer range */ + EXPECT_STRREF_EQ(" a", parse_int("1234567890123 a", -3, val)); + EXPECT_EQ(val, -3); + /* Has leading white-space when we don't expect it */ + EXPECT_STRREF_EQ(" 1", parse_int(" 1", -4, val, false)); + EXPECT_EQ(val, -4); +} + +TEST(string_utils, parse_float_valid) +{ + std::string str = "1 -10 123.5 -17.125 0.1 1e6 50.0e-1"; + StringRef s = str; + float val; + s = parse_float(s, 0, val); + EXPECT_EQ(1.0f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(-10.0f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(123.5f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(-17.125f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(0.1f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(1.0e6f, val); + s = parse_float(s, 0, val); + EXPECT_EQ(5.0f, val); + EXPECT_TRUE(s.is_empty()); +} + +TEST(string_utils, parse_float_invalid) +{ + float val; + /* Invalid syntax */ + EXPECT_STRREF_EQ("_0", parse_float("_0", -1.0f, val)); + EXPECT_EQ(val, -1.0f); + EXPECT_STRREF_EQ("..5", parse_float("..5", -2.0f, val)); + EXPECT_EQ(val, -2.0f); + /* Out of float range. Current float parser (fast_float) + * clamps out of range numbers to +/- infinity, so this + * one gets a +inf instead of fallback -3.0. */ + EXPECT_STRREF_EQ(" a", parse_float("9.0e500 a", -3.0f, val)); + EXPECT_EQ(val, std::numeric_limits::infinity()); + /* Has leading white-space when we don't expect it */ + EXPECT_STRREF_EQ(" 1", parse_float(" 1", -4.0f, val, false)); + EXPECT_EQ(val, -4.0f); +} + +} // namespace blender::io -- cgit v1.2.3