Merge branch 'master' into nccl

author: Marcin Junczys-Dowmunt <marcinjd@microsoft.com> 2018-12-07 00:21:25 +0300
committer: Marcin Junczys-Dowmunt <marcinjd@microsoft.com> 2018-12-07 00:21:25 +0300
commit: 1b2968c8b9465ce2225f304f5deea7f642f3e533 (patch)
tree: 3dc8fd194c28a635ac57dea9951ba74f9c21cef6
parent: 9562338ff78e226caad84ac29aa0be4e8b344368 (diff)
parent: e78d805955a5613e91cc3f2af1db2776a6c6e3da (diff)
69 files changed, 7821 insertions, 862 deletions
diff --git a/.gitignore b/.gitignore
index 2931bf0a..80080441 100755
--- a/.gitignore
+++ b/.gitignore
@@ -62,5 +62,3 @@ examples/mnist/*ubyte
 .vs
 .vscode
 
-# SentencePiece is automatically downloaded when requested
-src/3rd_party/sentencepiece/
diff --git a/.gitmodules b/.gitmodules
index 903659e7..623b7060 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "examples"]
 	path = examples
 	url = https://github.com/marian-nmt/marian-examples
+[submodule "src/3rd_party/sentencepiece"]
+	path = src/3rd_party/sentencepiece
+	url = https://github.com/marian-nmt/sentencepiece
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce5fb3a4..a2c2e48d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,9 +7,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+### Fixed
+- Errors due to warnings
+
+### Changed
+- Set nearly all warnings as errors for Marian's own targets. Disable warnings for 3rd party.
+
+## [1.7.0] - 2018-11-27
+
 ### Added
 - Word alignment generation in scorer
 - Attention output generation in decoder and scorer with `--alignment soft`
+- Support for SentencePiece vocabularies and run-time segmentation/desegmentation
+- Support for SentencePiece vocabulary training during model training
+- Group training files by filename when creating vocabularies for joint vocabularies
+- Updated examples
+- Synchronous multi-node training (early version)
 
 ### Fixed
 - Delayed output in line-by-line translation
@@ -17,6 +30,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ### Changed
 - Generated word alignments include alignments for target EOS tokens
 - Boost::program_options has been replaced by another CLI library
+- Replace boost::file_system with Pathie
 - Expansion of unambiguous command-line arguments is no longer supported
 
 ## [1.6.0] - 2018-08-08
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1293d39a..c585b9f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,11 @@
 cmake_minimum_required(VERSION 3.5.1)
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
+if (POLICY CMP0074)
+  cmake_policy(SET CMP0074 NEW) # CMake 3.12
+endif ()
+
+
 project(marian CXX C)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -38,33 +43,33 @@ if(MSVC)
 
   set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
 else()
-  set(CMAKE_CXX_FLAGS                 " -std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} -msse4.1 -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas")
+  set(DISABLE_GLOBALLY "-Wno-unused-result")
+
+  # These are used in src/CMakeLists.txt on a per-target basis
+  list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
+                           -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers)
+
+  # This warning does not exist prior to gcc 5.0
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
+    list(APPEND ALL_WARNINGS -Wsuggest-override)
+  endif()
+
+  set(CMAKE_CXX_FLAGS                 "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} -msse4.1 -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}")
   set(CMAKE_CXX_FLAGS_RELEASE         "${CMAKE_CXX_FLAGS} -g -rdynamic")
-  set(CMAKE_CXX_FLAGS_DEBUG           " -std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas")
+  set(CMAKE_CXX_FLAGS_DEBUG           "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas")
   set(CMAKE_CXX_FLAGS_SLIM            "${CMAKE_CXX_FLAGS} -DNDEBUG")
-  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO  "${CMAKE_CXX_FLAGS_RELEASE} -pg -g -rdynamic -Wall -Wextra -Wsuggest-override -Wno-unused-value -Wno-unknown-pragmas -Wno-sign-compare -Wno-missing-field-initializers")
+  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO  "${CMAKE_CXX_FLAGS} -g -rdynamic")
   set(CMAKE_CXX_FLAGS_PROFILE         "${CMAKE_CXX_FLAGS_RELEASE} -pg -g -rdynamic")
   set(CMAKE_CXX_FLAGS_PROFGEN         "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
   set(CMAKE_CXX_FLAGS_PROFUSE         "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
-endif()
+  endif()
 
 # Downloading SentencePiece if requested and set to compile with it.
 # Requires all the dependencies imposed by SentencePiece
 if(USE_SENTENCEPIECE)
-  message(STATUS "Using SentencePiece from our fork https://github.com/marian-nmt/sentencepiece.git")
-  if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/3rd_party/sentencepiece)
-    execute_process(COMMAND git clone https://github.com/marian-nmt/sentencepiece.git
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/3rd_party
-                    RESULT_VARIABLE git_result
-                    ERROR_QUIET)
-    message(STATUS "Downloaded SentencePiece [code: ${git_result}]")
-  else()
-    message(STATUS "It seems that SentencePiece has already been downloaded. Reusing.")
-  endif()
-
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_SENTENCEPIECE")
   LIST(APPEND CUDA_NVCC_FLAGS -DUSE_SENTENCEPIECE; )
-  set(EXT_LIBS ${EXT_LIBS} sentencepiece)
+  set(EXT_LIBS ${EXT_LIBS} sentencepiece sentencepiece_train)
 endif()
 
 
@@ -121,6 +126,7 @@ else(CMAKE_BUILD_TYPE STREQUAL "Debug")
   list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;)
 endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
 if(NOT MSVC)
+  # @TODO: add warnings here too
   list(APPEND CUDA_NVCC_FLAGS -std=c++11; -Xcompiler\ -fPIC; -Xcompiler\ -Wno-unused-result; -Xcompiler\ -Wno-deprecated; -Xcompiler\ -Wno-pragmas; -Xcompiler\ -Wno-unused-value; -Xcompiler\ -Werror;)
 else()
   list(APPEND CUDA_NVCC_FLAGS -Xcompiler\ /FS; )
diff --git a/VERSION b/VERSION
index d4f6e2c5..a97fc441 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v1.6.2
+v1.7.1
diff --git a/examples b/examples
-Subproject 8c6f4ef6859ef224dbc7ff891884bf7050d718c
+Subproject 336740065d9c23e53e912a1befff18981d9d27a
diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt
index faf37527..f7eabf54 100644
--- a/src/3rd_party/CMakeLists.txt
+++ b/src/3rd_party/CMakeLists.txt
@@ -3,9 +3,9 @@ include_directories(.)
 
 add_subdirectory(./yaml-cpp)
 add_subdirectory(./SQLiteCpp)
+add_subdirectory(./pathie-cpp)
 
 if(USE_SENTENCEPIECE)
-
   if(USE_STATIC_LIBS)
     set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
     if(WIN32)
@@ -29,9 +29,8 @@ if(USE_SENTENCEPIECE)
   if(USE_STATIC_LIBS)
     set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
   endif()
-
 endif(USE_SENTENCEPIECE)
 
 include_directories(./SQLiteCpp/include)
 include_directories(./CLI)
-
+include_directories(./pathie-cpp/include)
diff --git a/src/3rd_party/pathie-cpp/CHANGELOG b/src/3rd_party/pathie-cpp/CHANGELOG
new file mode 100644
index 00000000..52942338
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/CHANGELOG
@@ -0,0 +1,52 @@
+-- Version 0.1.0 (2017-10-28) --
+
+* Add Pathie::Tempdir and Pathie::Tempfile classes for
+  creating temporary directories and files.
+* Add Pathie::entry_iterator, Path::begin_entries(), and
+  Path::end_entries(). These allow you to use real C++
+  iterators for working with directory entries.
+* Change Path::find() to take a callback instead of std::function to
+  make it compile under C++98.
+* Add Path::operator/=.
+* Add `const' qualifier to Path::fopen() and Path::touch() as these
+  methods leave the path itself unchanged.
+* Add C++98 compatibility (instead of just C++11).
+* Mark stream replacements as experimental. They are mostly untested
+  and I don't really use them.
+* Add PATHIE_BUILD_STREAM_REPLACEMENTS for building the stream
+  replacements.
+* Rename build option ASSUME_UTF8_ON_UNIX to
+  PATHIE_ASSUME_UTF8_ON_UNIX.
+* Switch license from GPL3 to BSD-2clause.
+* Drop support for expanding "~username/foo" constructs. This
+  nonstandard extension was unportable and caused problems when
+  linking Pathie statically.
+* Add Path::utf8_str() method.
+* Restructure header #include order. Pathie now requires you
+  to specify the exact header to include (e.g. <pathie/path.hpp>)
+  instead of one global header. There was no point in having the
+  stream replacements included if not required.
+* Fix compilation problem with _PATHIE_UNIX not being defined
+* Do not include <windows.h> in Pathie public headers. This caused
+  problems in some circumstances when a certain macro combination
+  of windows.h was needed.
+* Remove config.hpp. This caused confusion when the library was used.
+  Build configuration now only happens via comandline options.
+* Drop shaky support for NTFS symlinks. It never worked really well
+  anyway.
+
+-- Version 0.0.3 (2015-04-30) --
+
+* Don't use CMake's global configuration variables, allowing pathie to
+  be built as a subproject.
+* Fix compilation error on systems that do not automatically
+  #include <stdexcept>.
+
+-- Version 0.0.2 (2015-02-16) --
+
+* Fix installation error on config.hpp
+* Add message that C++11 is required for compilation
+
+-- Version 0.0.1 (2015-02-13) --
+
+First public release.
diff --git a/src/3rd_party/pathie-cpp/CMakeLists.txt b/src/3rd_party/pathie-cpp/CMakeLists.txt
new file mode 100644
index 00000000..db5744f5
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories(..)
+include_directories(.)
+include_directories(include)
+
+FILE(GLOB PathieCppSources src/*.cpp)
+if (NOT TARGET pathie-cpp)
+  add_library(pathie-cpp OBJECT ${PathieCppSources})
+endif()
diff --git a/src/3rd_party/pathie-cpp/LICENSE b/src/3rd_party/pathie-cpp/LICENSE
new file mode 100644
index 00000000..f74dec43
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/LICENSE
@@ -0,0 +1,24 @@
+Copyright © 2015, 2017 Marvin Gülker
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+“AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/3rd_party/pathie-cpp/README.md b/src/3rd_party/pathie-cpp/README.md
new file mode 100644
index 00000000..80b68770
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/README.md
@@ -0,0 +1,359 @@
+PATHIE.
+=======
+
+This is the Pathie project. It aims to provide a C++ library that covers
+all needs of pathname manipulation and filename fiddling, without
+having to worry about the underlying platform. That is, it is a glue
+library that allows you to create platform-independent filename
+handling code with special regard to Unicode path names.
+
+Supported systems
+-----------------
+
+Currently supported platforms are Linux and Windows, the latter via
+MSYS2 GCC. Any other compiler or system might or might not work. Mac
+OS should work as well, but I cannot test this due to lack of a Mac. I
+gladly accept contributions for any system or compiler.
+
+Pathie's source code itself is written conforming to C++98. On UNIX
+systems, it assumes the system supports POSIX.1-2001. On Windows
+systems, the minimum supported Windows version is Windows Vista.
+
+Installation
+------------
+
+See INSTALL.md.
+
+The library
+-----------
+
+The entire world is using UTF-8 as the primary Unicode encoding. The
+entire world? No, a little company from Redmond resists the temptation
+and instead uses UTF-16LE, causing cross-platform handling of Unicode
+paths to be a nightmare.
+
+One of the main problems the author ran into was compiler-dependant
+code that was not marked as such. Many sites on the Internet claim
+Unicode path handling on Windows is easy, but in fact, it only is if
+you define “development for Windows” as “development with MSVC”,
+Microsoft’s proprietary C/C++ compiler, which provides nonstandard
+interfaces to allow for handling UTF-16LE filenames. The Pathie
+library has been developed with a focus on MinGW and crosscompilation
+from Linux to Windows and thus does not suffer from this problem.
+
+The Pathie library has been developed to release the programmer from
+the burden of handling the different encodings in use for filenames,
+and does so by focusing its API on UTF-8 regardless of the platform in
+use. Thus, if you use UTF-8 as your preferred encoding inside your
+program (take a look at the [UTF8 Everywhere
+website](http://www.utf8everywhere.org) for reasons why you should do
+that), Pathie will be of the most use for you, since it transparently
+converts whatever filesystem encoding is encountered to UTF-8 in its
+public interface. Likewise, any pathname you pass to the library is
+assumed to be UTF-8 and is transcoded transparently to the filesystem
+encoding before invoking the respective OS' filesystem access
+methods. Of course, explicit conversion functions are also provided,
+in case you do need a string in the native encoding or need to
+construct a path from a string in the native encoding.
+
+General Usage
+-------------
+
+First thing is to include the main header:
+
+~~~~~~~~~~~~~~~~~~{.cpp}
+#include <pathie/path.hpp>
+~~~~~~~~~~~~~~~~~~
+
+Now consider the simple task to get all children of a directory, which
+have Unicode filenames. Doing that manually will result in you having
+to convert between UTF-8 and UTF-16 all the time. With pathie, you can
+just do this:
+
+~~~~~~~~~~~~~~~~~~~{.cpp}
+std::vector<Pathie::Path> children = your_path.children();
+~~~~~~~~~~~~~~~~~~~
+
+Done. Retrieving the parent directory of your directory is pretty easy:
+
+~~~~~~~~~~~~~~~~~~~{.cpp}
+Pathie::Path yourpath("foo/bar/baz");
+Pathie::Path parent = yourpath.parent();
+~~~~~~~~~~~~~~~~~~~
+
+But Pathie is much more than just an abstraction of different filepath
+encodings. It is a utility library for pathname manipulation, i.e. it
+allows you to do things like finding the parent directory, expanding
+relative to absolute paths, decomposing a filename into basename,
+dirname, and extension, and so on. See the documentation of the
+central Pathie::Path class on what you can do.
+
+~~~~~~~~~~~~~~~~~~~~~~{.cpp}
+// Assume current directory is /tmp
+Pathie::Path p("foo/bar/../baz");
+p.expand(); // => /tmp/foo/baz
+~~~~~~~~~~~~~~~~~~~~~~
+
+Or my personal favourite:
+
+~~~~~~~~~~~~~~~~~~~{.cpp}
+Pathie::Path p1("/tmp/foo/bar");
+Pathie::Path p2("/tmp/bar/foo");
+Pathie::Path p3 = p1.relative(p2); // => ../../foo/bar
+~~~~~~~~~~~~~~~~~~~
+
+It also provides you with commonly used paths like the user’s
+configuration directory or the path to the running executable.
+
+~~~~~~~~~~~~~~~~~~~~{.cpp}
+Pathie::Path configdir  = Pathie::Path::config_dir();
+Pathie::Path exepath    = Pathie::Path::exe();
+~~~~~~~~~~~~~~~~~~~~
+
+Pathie assumes that all string arguments passed are in UTF-8 and
+transparently converts to the native filesystem encoding internally.
+
+Still, if you interface directly with the Windows API or other external
+libraries, you might want to retrieve the native representation from a
+Path or construct a Path from the native representation. Pathie
+doesn’t want to be in your way then. The following example constructs
+from and converts to the native representation on Windows, which is
+UTF-16LE:
+
+~~~~~~~~~~~~~~~~~~~~{.cpp}
+// Contruct from native
+wchar_t* utf16 = Win32ApiCall();
+Path mypath = Path::from_native(utf16); // also accepts std::wstring
+
+// Retrieve native (Note C++’ish std::wstring rather than
+// raw wchar_t* on Windows)
+std::wstring native_utf16 = mypath.native();
+~~~~~~~~~~~~~~~~~~~~
+
+On UNIX, these methods work with normal strings (std::string instead
+of std::wstring) in the underlying filesystem encoding. In most cases,
+that will be UTF-8, but some legacy systems may still use something
+like ISO-8859-1 in which case that will differ.
+
+### Temporary files and directories
+
+There are two classes `Pathie::Tempdir` and `Pathie::Tempfile` that
+you can use if you need to work with temporary files or directories,
+respectively. Constructing instances of these classes creates a
+temporary entry, which is removed (recursively in case of directories)
+when the instance is destroyed again. Use TempEntry::path() to get
+access to the Path instance pointing to the created entry.
+
+~~~~~~~~~~~~~~~~~~~~{.cpp}
+#include <pathie/tempdir.hpp>
+
+//...
+
+{
+  srand(time(NULL)); // Needs random number generator
+  Pathie::Tempdir tmpdir("foo"); // Pass a fragment to use as part of filename
+  std::cout << "Temporary dir is: " << tmpdir.path() << std::endl;
+}
+// When `tmpdir' is destroyed, the destructor recursively
+// deletes the directory that was created.
+~~~~~~~~~~~~~~~~~~~~
+
+### Opening a file with a Unicode path name
+
+On Windows with GCC, it is [not possible to open a file with Unicode
+pathname](https://stackoverflow.com/questions/821873) via C++'s usual
+`std::ifstream` and `std::ofstream` mechanism. There's a nonstandard
+extension provided by Microsoft's proprietary compiler that does this,
+but GCC does not have this extension. Consequently, code that is
+intended to compile on GCC (like Pathie) has to avoid it.
+
+There *is* however a function in the Win32API that allows to open a
+file with a Unicode pathname *and* that returns a standard C `FILE*`
+handle,
+[_wfopen()](http://msdn.microsoft.com/en-us/library/yeby3zcb.aspx). The
+method Path::fopen() uses this function on Windows and a regular C
+`fopen()` on all other platforms, thus allowing you to just deal with
+your Unicode filename via the regular C I/O interface. If you urgently
+need C++ I/O streams, read on.
+
+### Stream replacements
+
+Pathie mainly provides you with the means to handle paths, compose,
+and decompose them. There is an experimental feature however that
+provides replacements for C++ file streams that work with instances of
+Pathie::Path instead of strings for opening a file. These replacements
+are neither elegant nor portable, because they don't nicely honour the
+template concept the STL is based on by directly subclassing the
+standard streams in the matter needed most frequently and additionally
+relying on vendor-specific details. For GCC, an internal (but at least
+documented) interface is used to exchange the file descriptor inside a
+stream, and for MSVC, a nonstandard (but documented) constructor is
+used. Other compilers are not supported by this feature (which most
+notably affects clang, where I have no idea on the interfaces I need
+to use for such a trick).
+
+In one word, these replacements are hacky and I consider them
+experimental. If that does not strike you as problematic, you can
+enable this feature by passing `-DPATHIE_BUILD_STREAM_REPLACEMENTS=ON`
+when invoking `cmake` during the build process.
+
+In order to use the replacements, include the respective header
+(either `pathie_ifstream` or `pathie_ofstream`) and use the
+`Pathie::ifstream` and `Pathie::ofstream` classes just like you would
+use `std::ifstream` and `std::ofstream`, with the only difference
+being that you construct them from a Pathie::Path instance instead of
+a string. See the documentation of Pathie::ofstream for more
+information.
+
+~~~~~~~~~~~~~~~~~{.cpp}
+#include <pathie/pathie_ofstream>
+
+// ...
+
+Pathie::Path p("Bärenstark.txt");
+Pathie::ofstream file(p);
+file << "Some content" << std::endl;
+file.close()
+~~~~~~~~~~~~~~~~~
+
+There's also the inofficial
+[boost::nowide](http://cppcms.com/files/nowide/html/), which is
+similar to this feature and maybe more reliable. It has [recently been
+accepted into
+boost](https://lists.boost.org/boost-announce/2017/06/0516.php).
+
+Dependencies and linking
+------------------------
+
+Pathie is standalone, that is, it requires no other libraries except
+for those provided by your operating system. Note that there’s a
+caveat with this on Windows, which does provide the `Shlwapi` library
+by default, but MinGW's GCC does not automatically link it in. Be sure
+to link to this library explicitely when compiling for MinGW Windows
+by appending `-lShlwapi` to the end of your linking command line.
+
+It is recommended to link in pathie as a dynamic library, because
+there are some problems with it when linked statically on certain
+operating systems (see _Caveats_ below). If you are sure you aren’t
+affected by those problems, it is possible to link in pathie
+statically.
+
+Caveats
+-------
+
+This library assumes that under all UNIX systems out there (I also
+consider Mac OSX to be a UNIX system) the file system root always is
+`/` and the directory separator also always is `/`. This structure is
+mandatory as per POSIX -- in POSIX.1-2008, it’s specified in section
+10.1. Systems which do neither follow POSIX directory structure, nor
+are Windows, are unsupported.
+
+On POSIX-compliant systems other than Mac OS X, the filesystem
+encoding [generally is
+unspecified](https://unix.stackexchange.com/questions/2089/what-charset-encoding-is-used-for-filenames-and-paths-on-linux).
+Pathnames are merely byte blobs which do not contain NUL bytes, and
+components are separated by `/`. It’s up to the applications,
+including utilities like a shell or the ls(1) program, to make
+something of those byte streams. Therefore, it is perfectly possible
+that on one system, user A uses ISO-8859-1 filenames and user B uses
+UTF-8 filenames. Even the same user could use differently encoded
+filenames. Programs that have to interpret the byte blobs in pathnames
+on these systems look at the locale environment variables, namely
+`LANG` and `LC_ALL`, see section 7 of POSIX.1-2008. As a consequence,
+it may happen you want to create filenames with characters not
+supported in the user’s pathname encoding. For example, if you want to
+create a file with a hebrew filename and the user’s pathname encoding
+is ISO-8859-1, there’s a problem, because ISO-8859-1 has no hebrew
+characters in it, but in UTF-8, which is the encoding you are advised
+to use and which is what Pathie’s API expects from you, they are
+available. There is no sensible solution to this problem that the
+Pathie library could dictate; the `iconv()` function used by pathie
+just replaces characters that are unavailable in the target encoding
+with a system-defined default (probably “?”). Note that on systems
+which have a Unicode pathname encoding, especially modern Linuxes with
+UTF-8, such a situation can’t ever arise, because the Unicode
+encodings (UTF-*) cover all characters you can ever use.
+
+At least on FreeBSD, calling the POSIX `iconv()` function fails with
+the cryptic error message “Service unavailable” if a program is linked
+statically. I’ve reported [a bug on
+this](https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=196567). This
+means that you currently can’t link in pathie statically on FreeBSD
+and systems which don’t allow statically linked executables to call
+`iconv()`.
+
+On Linux systems, it is recommended to set your program’s locale to the
+environment’s locale before you call any functions the Pathie library
+provides, because this will allow Pathie to use the correct encoding
+for filenames. This is relevant where the environment’s encoding is
+not UTF-8, e.g. with $LANG set to `de_DE.ISO-8859-1`. You can do this
+as follows (the `""` locale always refers to the locale of the
+environment):
+
+~~~~~~~~~~~~~~~~~~~~~{.cpp}
+#include <locale>
+std::locale::global(std::locale(""));
+~~~~~~~~~~~~~~~~~~~~~
+
+This is not required on Windows nor on Mac OS X, because these
+operating systems always use UTF-16LE (Windows) or UTF-8 (Mac OS X) as
+the filesystem encoding, regardless of the user's locale. It however
+does not hurt to call this either, it simply makes no difference for
+Pathie on these systems. If you urgently need to avoid this call on
+Linux, you need to compile pathie with the special build option
+PATHIE_ASSUME_UTF8_ON_UNIX, which will force Pathie to assume that
+UTF-8 is used as the filesystem encoding under any UNIX-based system.
+
+Links
+-----
+
+* Project page: https://www.guelkerdev.de/projects/pathie/
+* GitHub mirror: https://github.com/Quintus/pathie-cpp
+* Issue tracker: https://github.com/Quintus/pathie-cpp/issues
+
+Contributing
+------------
+
+Feel free to submit any contributions you deem useful. Try to make
+separate branches for your new features, give a description on what
+you changed, etc.
+
+Don’t you duplicate boost::filesystem?
+-------------------------------------
+
+Yes and
+no. [boost::filesystem](http://www.boost.org/doc/libs/1_56_0/libs/filesystem/doc/index.htm)
+provides many methods pathie provides, but has a major problem with
+Unicode path handling if you are not willing to do the UTF-8/UTF-16
+conversion manually. boost::filesystem always uses UTF-8 to store the
+paths on UNIX, and, which is the problem, always uses UTF-16LE to
+store the paths on a Windows system. There is no way to override
+this, although there is a [hidden documentation
+page](http://www.boost.org/doc/libs/1_51_0/libs/locale/doc/html/default_encoding_under_windows.html)
+that claims to solve the problem. I have wasted a great amount of time
+to persuade boost::filesystem to automatically convert all
+`std::string` input it receives into UTF-16LE, but failed to
+succeed. Each time I wanted to create a file with a Unicode filename,
+the test failed on Windows by producing garbage filenames. Finally I
+found out that the neat trick shown in the documentation above indeed
+does work -- but only if you use the Microsoft Visual C++ compiler
+(MSVC) to compile your code. I don’t, I generally use g++ via the
+[MinGW](http://www.mingw.org) toolchain. boost::filesystem fails with
+g++ via MinGW with regard to Unicode filenames on Windows as of this
+writing (September 2014).
+
+Apart from that, pathie provides some additional methods, especially
+with regard to finding out where the user’s paths are. It is modelled
+after Ruby’s popular
+[Pathname](http://ruby-doc.org/stdlib-2.1.2/libdoc/pathname/rdoc/Pathname.html#method-i-rmtree)
+class, but it doesn’t entirely duplicate its interface (which wouldn’t
+be idiomatic C++).
+
+Also, pathie is a small library. Adding it to your project shouldn’t
+hurt too much, while boost::filesystem is quite a large dependency.
+
+License
+-------
+
+Pathie is BSD-licensed; see the file “LICENSE” for the exact license
+conditions.
diff --git a/src/3rd_party/pathie-cpp/include/entry_iterator.hpp b/src/3rd_party/pathie-cpp/include/entry_iterator.hpp
new file mode 100644
index 00000000..85a53b1f
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/entry_iterator.hpp
@@ -0,0 +1,119 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_ENTRY_ITERATOR_HPP
+#define PATHIE_ENTRY_ITERATOR_HPP
+#include <iterator>
+
+namespace Pathie {
+
+  class Path;
+
+  /**
+   * An iterator class for reading the entries in a directory.
+   * Note that the entries of a directory always include the
+   * "." (current directory) and ".." (parent directory) entries
+   * unresolved, and that the order in which the entries in the
+   * directory are returned is undefined (actually, the order
+   * depends on the filesystem used).
+   *
+   * The iterators of this class are always const. You cannot change
+   * the values referenced.
+   *
+   * It is unspecified behaviour what happens if a directory entry is
+   * added or removed to/from the directory while you are iterating
+   * it. Thus, keep iterations short in time.
+   *
+   * Instances of this class wrap an ephemeral handle like for example
+   * a directory descriptor on Linux. This handle is not copiable,
+   * which should normally mean that instances of this class cannot be
+   * copied. However, the `std::iterator` interface mandates that
+   * iterator instances are copiable (see "Requirements" here:
+   * <http://en.cppreference.com/w/cpp/concept/Iterator>) and in fact
+   * the language copies iterators all the time if you use them for
+   * example in a for loop. Consequently, this class implements the
+   * copy constructor and the copy assignment. However, these operations
+   * do *not* actually copy the instance, but instead *move* the content
+   * from the source instance to the target instance. The source intance
+   * is afterwards unusable and looks like a finished iterator. The
+   * `const` qualifiers in the copy operations are explicitely casted
+   * away inside the functions to allow this, so they don't mean anything
+   * for them. This works fairly nice for the ordinary use case (where
+   * the language creates implicit copies), but the API may look as if
+   * copying instances is allowed. It is not. *Do not copy* instances of
+   * this class even though it looks as if it's possible. Implicit
+   * copies automatically done by C++ as in for loops are okay, but
+   * that's it. That is, you *can* do this:
+   *
+   * ~~~~{.cpp}
+   *   entry_iterator iter;
+   *   for(iter=my_path.begin_entries(); iter != my_path.end_entries(); iter++) {
+   *     // Work with iter...
+   *   }
+   * ~~~~
+   *
+   * But you *cannot* do this:
+   *
+   * ~~~~{.cpp}
+   * entry_iterator iter=my_path.begin_entries();
+   * entry_iterator iter2(iter);
+   * ~~~~
+   *
+   * This example does compile, but `iter` will be unusable after
+   * `iter2` has been constructed.
+   */
+  class entry_iterator: public std::iterator<std::input_iterator_tag, Path, int>
+  {
+  public:
+    entry_iterator();
+    entry_iterator(const Path* p_top);
+    ~entry_iterator();
+    entry_iterator& operator=(const Path* p_top); // Restart assignment
+    operator bool() const;
+    bool operator==(const entry_iterator& other) const;
+    bool operator!=(const entry_iterator& other) const;
+    entry_iterator& operator++(int);
+    entry_iterator& operator++();
+    const Path& operator*() const;
+    const Path* operator->() const;
+
+    // "Copy" operations that really move the content, see class docs
+    entry_iterator(const entry_iterator& other);
+    entry_iterator& operator=(const entry_iterator& other);
+  private:
+    void open_native_handle();
+    void close_native_handle();
+
+    const Path* mp_directory; ///< Path requested to read from.
+    void* mp_cur; ///< Native handle to the opened directory.
+    Path* mp_cur_path; ///< Path instance of the path pointed to by mp_cur (only a pointer to allow forward-declaration of Path).
+  };
+}
+
+#endif /* PATHIE_ENTRY_ITERATOR_HPP */
diff --git a/src/3rd_party/pathie-cpp/include/errors.hpp b/src/3rd_party/pathie-cpp/include/errors.hpp
new file mode 100644
index 00000000..d79fb3c3
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/errors.hpp
@@ -0,0 +1,119 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_ERRORS_HPP
+#define PATHIE_ERRORS_HPP
+#include <exception>
+#include <string>
+#include <cstdlib>
+
+/* DWORD is typedef'ed from unsigned long, see
+ * <https://msdn.microsoft.com/en-us/library/cc230318.aspx>
+ * HRESULT is typedef'ed from LONG, which in turn is a typedef
+ * of long, see <https://msdn.microsoft.com/en-us/library/cc230330.aspx>.
+ * I spell the types out here in this header to avoid having to
+ * include windows.h, which might interfer with programmes using
+ * pathie that want to include windows.h on itself. */
+
+#include "pathie.hpp"
+
+namespace Pathie {
+
+  /// Base class for all exceptions in this library.
+  class PathieError: public std::exception {
+  public:
+    PathieError(); ///< Constructs a new instance.
+    PathieError(std::string message); ///< Contructs a new instance with the given what() message.
+    virtual ~PathieError() throw();
+
+    virtual const char* what() const throw(); ///< The error message.
+  protected:
+    std::string m_pathie_errmsg; ///< The error message given in the constructor.
+  };
+
+
+  /// This exception is thrown when a call to a C/system function results
+  /// in `errno` being set.
+  class ErrnoError: public PathieError {
+  public:
+    ErrnoError(int val); ///< Constructs a new instance from the given `errno` value.
+    virtual ~ErrnoError() throw();
+
+    inline int get_val(){return m_val;} ///< The `errno` value.
+  private:
+    int m_val;
+  };
+
+#ifdef _WIN32
+
+  /// This exception is thrown only on Windows, when a call to the Win32API
+  /// fails.
+  /// The "unsigned long" type here is actually DWORD (which is it a
+  /// typedef of in Win32).
+  class WindowsError: public PathieError {
+  public:
+    WindowsError(unsigned long val); ///< Constructs a new instance from the given GetLastError() value.
+    virtual ~WindowsError() throw();
+
+    inline int get_val(){return m_val;} ///< The GetLastError() value.
+  private:
+    unsigned long m_val;
+  };
+
+  /// Similar to WindowsError, this exception is thrown when a HANDLE function
+  /// from the Win32API fails.
+  /// The "long" type here is actually HRESULT (which it is a typedef of in Win32).
+  class WindowsHresultError: public PathieError {
+  public:
+    WindowsHresultError(long value); ///< Constructs a new instance from the given handle function result.
+    virtual ~WindowsHresultError() throw();
+
+    inline long get_val(){return m_val;} ///< The handle function result.
+  private:
+    int m_val;
+  };
+#endif
+
+#ifdef _PATHIE_UNIX
+
+  /// This exception is thrown only on UNIX, when a call to the POSIX glob(3)
+  /// function fails.
+  class GlobError: public PathieError {
+  public:
+    GlobError(int val); ///< Contructs a new instance from the given glob(3) error code.
+    virtual ~GlobError() throw();
+
+    inline int get_val(){return m_val;} ///< The glob(3) error code.
+  private:
+    int m_val;
+  };
+#endif
+
+}
+#endif
diff --git a/src/3rd_party/pathie-cpp/include/path.hpp b/src/3rd_party/pathie-cpp/include/path.hpp
new file mode 100644
index 00000000..90729709
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/path.hpp
@@ -0,0 +1,377 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_PATH_HPP
+#define PATHIE_PATH_HPP
+#include <string>
+#include <iostream>
+#include <vector>
+#include <sys/stat.h>
+
+#include "pathie.hpp"
+#include "entry_iterator.hpp"
+
+namespace Pathie {
+
+  // Forward-declare, defined in pathie.cpp.
+#if defined(_WIN32)
+  std::string utf16_to_utf8(std::wstring);
+  std::wstring utf8_to_utf16(std::string);
+#elif defined(_PATHIE_UNIX)
+  std::string utf8_to_filename(const std::string& utf8);
+  std::string filename_to_utf8(const std::string& native_filename);
+#endif
+
+  /**
+   * \brief Main class, describing paths.
+   *
+   * This class represents a single path on the filesystem.
+   * The path does not have to exist, but this class provides
+   * you with means to create it.
+   *
+   * Note on predefined directories
+   * ------------------------------
+   *
+   * This class provides a lot of methods for retrieving information about
+   * system and user predefined directories. Note however that the
+   * referenced directories may or may not exist.
+   *
+   * See the pathlist.md document for an overview of possible path
+   * return values.
+   *
+   * Note on XDG directories on UNIX
+   * -------------------------------
+   *
+   * Nowadays UNIX systems have adapted the Freedesktop.org
+   * XDG standards, and it is highly recommended to follow them
+   * when you write an application that stores user-specific data.
+   * XDG directories fall in two groups: Core data directories, covered
+   * by the main XDG specification, and user-dir directories, described
+   * in the documentation of the XDG user-dirs software. Directories of
+   * the first group are available today on all Linux systems, examples
+   * for them are ~/.config, ~/.local/share, and others. Directories
+   * of the latter group are typically found on desktop systems and
+   * are missing on servers, examples include ~/Documents and ~/Downloads.
+   *
+   * The following XDG specifications are followed:
+   *
+   * * XDG main specification: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+   * * XDG user-dirs specification: http://www.freedesktop.org/wiki/Software/xdg-user-dirs/
+   *
+   * Pathie is not a Shellscript parser, so it will fail if your XDG configuration
+   * files do not follow the usually found format. Especially no other variable
+   * substitution except from exactly one $HOME is understood.
+   *
+   * While the XDG specification for the core directories clearly says
+   * which directory to use if the administrator/user has not specified
+   * in his system configuration (by setting the appropriate environment
+   * variables), the user-dirs isn’t that easy. Or rather, it is, but not
+   * all desktop environment bother to follow it. The XDG user-dirs spec
+   * requires a file `~/.config/user-dirs.dirs` to exist, generated by the
+   * program xdg-user-dirs-update(1), which is run by all major desktop
+   * environments. Smaller ones don’t always do that, resulting in the file
+   * missing. The spec leaves open what should happen in such a case, i.e.
+   * it’s implemention-defined behaviour. I have chosen to return the
+   * user’s $HOME directory in such a case. The methods affected by this
+   * decision are the following ones:
+   *
+   * * documents_dir()
+   * * download_dir()
+   * * music_dir()
+   * * pictures_dir()
+   * * publicshare_dir()
+   * * templates_dir()
+   * * videos_dir()
+   *
+   * Other notes
+   * -----------
+   *
+   * On UNIX, this library follows the Filesystem Hierarchy Standard,
+   * version 2.3 (http://refspecs.linuxfoundation.org/FHS_2.3/fhs-2.3.html).
+   *
+   * On UNIX, the FHS defines a "normal" file hierarchy and a "local" one; for
+   * example, /usr/share is part of the "normal" file hierarchy, which is mirrored
+   * to the "local" one in /usr/local/share. The "local" hierarchy is inteded to be
+   * used by programs that the system administrator manually installed without resorting
+   * to the system’s default package manager. Such a difference does not exist on Windows.
+   * Pathie allows you to decide yourself which information you want to query when calling
+   * one of the following functions:
+   *
+   * * global_mutable_data_dir()
+   * * global_immutable_data_dir()
+   * * global_config_dir()
+   * * global_cache_dir()
+   * * global_appentries_dir()
+   *
+   * Each of these functions takes an argument that allows you to specify whether
+   * you want the "local" or the "normal" hierarchy’s paths returned. The argument
+   * however is optional, and you can use the set_global_dir_default() method to
+   * specify what should happen if no argument is specified. By default, paths of
+   * the "local" hierarchy are returned. For example:
+   *
+   * ~~~~~~~~~~~~~~~~~~~ c++
+   * Path p1 = Path::global_immutable_data_dir(); // /usr/local/share
+   * Path p2 = Path::global_immutable_data_dir(Path::LOCALPATH_NORMAL); // /usr/share
+   * Path p3 = Path::global_immutable_data_dir(Path::LOCALPATH_LOCAL); // /usr/local/share
+   *
+   * Path::set_global_dir_default(Path::LOCALPATH_NORMAL);
+   * Path p4 = Path::global_immutable_data_dir(); // /usr/share
+   * Path p5 = Path::global_immutable_data_dir(Path::LOCALPATH_LOCAL); // /usr/local/share
+   * Path p6 = Path::global_immutable_data_dir(Path::LOCALPATH_NORMAL); // /usr/share
+   * ~~~~~~~~~~~~~~~~~~~
+   *
+   * As you can see, the argument, if given, always takes precedence over the
+   * default set with set_global_dir_default().
+   */
+  class Path
+  {
+  public:
+
+    /**
+     * Specifies the argument type for the `global_*_dir()` functions.
+     * `LOCALPATH_DEFAULT` means fall back to the default set with `set_global_dir_default()`,
+     * `LOCALPATH_NORMAL` means to use the normal FHS paths, and `LOCALPATH_LOCAL` means to use
+     * the paths the FHS specifies for local additions.
+     */
+    enum localpathtype {
+      LOCALPATH_DEFAULT = 1,
+      LOCALPATH_NORMAL,
+      LOCALPATH_LOCAL
+    };
+
+    /// Default constructor.
+    Path();
+    /// Copy constructor.
+    Path(const Path& path);
+    /// Construct a path from a string.
+    Path(std::string path);
+    /// Construct a path from components.
+    Path(const std::vector<Path>& components);
+
+#if defined(_PATHIE_UNIX)
+    static inline Path from_native(const std::string& native_filename)
+      { return Path(filename_to_utf8(native_filename)); }
+#elif defined(_WIN32)
+    /** Convert a path that is in the native representation of
+     * the system into a Path instance. The argument will be
+     * transcoded from the system’s native encoding to UTF-8;
+     * on Windows, the argument is expected to be UTF-16LE therefore,
+     * while on UNIX, it is expected to be encoded in the environment’s
+     * locale. */
+    static inline Path from_native(const std::wstring& native_filename)
+      { return Path(utf16_to_utf8(native_filename)); }
+#else
+#error Unsupported system.
+#endif
+
+    /// Returns the current working directory.
+    static Path pwd();
+    /// Returns the path to the running executable.
+    static Path exe();
+    /// Returns the home directory.
+    static Path home();
+
+    static Path data_dir();        ///< Directory for permanent user data
+    static Path config_dir();      ///< Directory for permanent user configuration files
+    static Path cache_dir();       ///< Directory for cached user data
+    static Path runtime_dir();     ///< Directory for volatile information
+    static Path temp_dir();        ///< Directory for temporary data
+    static Path desktop_dir();     ///< User’s desktop directory
+    static Path documents_dir();   ///< User’s documents directory
+    static Path download_dir();    ///< User’s download directory
+    static Path music_dir();       ///< User’s music directory
+    static Path pictures_dir();    ///< User’s pictures directory
+    static Path publicshare_dir(); ///< User’s networking directory
+    static Path templates_dir();   ///< User’s document templates directory
+    static Path videos_dir();      ///< User’s video directory
+    static Path appentries_dir();  ///< User’s application starters directory
+
+    static Path global_mutable_data_dir(localpathtype local = LOCALPATH_DEFAULT);   ///< Global directory for immutable permanent data
+    static Path global_immutable_data_dir(localpathtype local = LOCALPATH_DEFAULT); ///< Global directory for mutable permanent data
+    static Path global_config_dir(localpathtype local = LOCALPATH_DEFAULT);         ///< Global directory for configuration files
+    static Path global_cache_dir(localpathtype local = LOCALPATH_DEFAULT);          ///< Global directory for cached data
+    static Path global_runtime_dir(localpathtype local = LOCALPATH_DEFAULT);        ///< Global directory for volatile information
+    static Path global_appentries_dir(localpathtype local = LOCALPATH_DEFAULT);     ///< Global application starters directory
+    static Path global_programs_dir();                        ///< Global directory for selfcontained programs
+
+    static Path mktmpdir(const std::string& name = "tmpd"); ///< Create a temporary directory
+
+    static inline void set_global_dir_default(localpathtype localdefault){ c_localdefault = localdefault; } ///< Specify what do do for the `global_*_dir()` methods if no argument is passed to them.
+    static inline localpathtype get_global_dir_default(){ return c_localdefault; } ///< Returns what was set with set_global_dir_default().
+
+#ifdef _PATHIE_UNIX
+    static std::vector<Path> data_dirs();
+    static std::vector<Path> config_dirs();
+#endif
+
+    /// Shell-like glob.
+    static std::vector<Path> glob(const std::string& pattern, int flags = 0);
+    /// Traverse directory recursively.
+    void find(bool (*cb)(const Path& entry)) const;
+
+    /// Return the path as a raw std::string.
+    std::string str() const;
+    /// Alias for str().
+    std::string utf8_str() const;
+    /// Assign the given string to the underlying path.
+    void assign(std::string str);
+
+#if defined(_PATHIE_UNIX)
+    std::string native() const;
+#elif defined(_WIN32)
+    /// Return the path in the native format.
+    std::wstring native() const;
+#else
+#error Unsupported system.
+#endif
+
+    void swap(Path& path) throw();
+
+    /// Number of components in the path string.
+    size_t component_count() const;
+    /// Burst path into components.
+    std::vector<Path> burst(bool descend = false) const;
+    /// Shell-like globbing.
+    std::vector<Path> dglob(const std::string& pattern, int flags = 0) const;
+    /// Glob pattern check without filesystem access.
+    bool fnmatch(const std::string& pattern, int flags = 0) const;
+
+    Path& operator=(const Path& path);
+    Path& operator=(const std::string& str);
+    /// Access single component in the path.
+    Path operator[](size_t index) const;
+    bool operator==(const Path& path) const;
+    bool operator!=(const Path& path) const;
+    bool operator<(const Path& path) const;
+    bool operator>(const Path& path) const;
+    bool operator<=(const Path& path) const;
+    bool operator>=(const Path& path) const;
+
+    Path operator/(Path path) const;
+    Path operator/(std::string str) const;
+    Path& operator/=(Path path);
+    Path& operator/=(std::string str);
+    Path join(Path path) const;
+    Path join(std::string path) const;
+    Path sub_ext(std::string new_extension) const;
+
+    /// Platform-independant C fopen().
+    FILE* fopen(const char* mode) const;
+    /// Update modification and access time to now.
+    void touch() const;
+
+    bool is_absolute() const; ///< Checks if a path is relative.
+    bool is_relative() const; ///< Checks if a path is absolute.
+    bool is_root() const;     ///< Checks if a path is the file system root.
+
+    /// Remove all . and .. occurences.
+    Path prune() const;
+    /// Creates an absolute path for this path.
+    Path absolute(const Path& base = Path::pwd()) const;
+    /// Creates a relative path from an absolute one.
+    Path relative(Path base) const;
+    /// Expands all shortcuts plus create an absolute path for this path.
+    Path expand() const;
+    /// Get the one real path for this path.
+    Path real() const;
+
+    Path parent() const;
+    Path root() const;
+    Path basename() const;
+    Path dirname() const;
+    std::string extension() const;
+    void split(Path& dirname, Path& basename) const;
+
+    /// C stat information.
+#if defined(_PATHIE_UNIX)
+    struct stat* stat() const;
+#elif defined(_WIN32)
+    struct _stat* stat() const;
+#else
+#error Unsupported system.
+#endif
+
+    /// File size.
+    long size() const;
+    time_t atime() const;
+    time_t mtime() const;
+    time_t ctime() const;
+
+    /// List of entries.
+    std::vector<Path> entries() const;
+    /// List of children.
+    std::vector<Path> children() const;
+
+    bool exists() const;
+    bool is_directory() const;
+    bool is_file() const;
+    bool is_symlink() const;
+
+    Path readlink() const;
+    /// Create a symbolic link.
+    void make_symlink(const Path& target) const;
+    void mkdir() const;
+    void rmdir() const;
+    void unlink() const;
+    void remove() const;
+    /// "mkdir -p"-like functionality.
+    void mktree() const;
+    /// "rm -r"-link functionality.
+    void rmtree() const;
+    /// Change file names.
+    void rename(Path& newname) const;
+
+    entry_iterator begin_entries() const;
+    entry_iterator end_entries() const;
+
+  private:
+    static std::string make_tempname(const std::string& namepart);
+    // Remove double // and trailing /, replace \ with /.
+    void sanitize();
+
+#if defined(_PATHIE_UNIX)
+    static Path get_xdg_dir(const std::string& envvarname, const std::string& defaultpath);
+    static std::vector<Path> get_xdg_dirlist(const std::string& envvarname, const std::string& defaultlist);
+    static std::string get_xdg_userdir_setting(const std::string& setting);
+    static std::string get_home(std::string username);
+#elif defined(_WIN32)
+    bool is_ntfs_symlink(const wchar_t* path) const;
+    wchar_t* read_ntfs_symlink(const wchar_t* path) const;
+#endif
+
+    static localpathtype c_localdefault;
+    std::string m_path;
+  };
+
+}
+
+/// std::cout compatibility.
+std::ostream& operator<<(std::ostream& stream, const Pathie::Path& p);
+
+#endif
diff --git a/src/3rd_party/pathie-cpp/include/pathie.hpp b/src/3rd_party/pathie-cpp/include/pathie.hpp
new file mode 100644
index 00000000..6afbf5b0
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/pathie.hpp
@@ -0,0 +1,67 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_PATHIE_HPP
+#define PATHIE_PATHIE_HPP
+#if __cplusplus < 199711L
+#error Pathie requires C++98 support. Please use an option such as -std=c++98 to enable it.
+#endif
+
+#if !defined(_PATHIE_UNIX) && (defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) || defined(BSD))
+#define _PATHIE_UNIX
+#endif
+
+#include <string>
+
+/// Namespace for this library.
+namespace Pathie {
+
+  /// Returns the version number is MAJOR.MINOR.TINY.
+  std::string version();
+
+  /**
+   * Returns the Git commit this was build from.
+   * Empty string if build without Git.
+   */
+  std::string gitrevision();
+
+#ifdef _WIN32
+  std::string utf16_to_utf8(std::wstring);
+  std::wstring utf8_to_utf16(std::string);
+#endif
+
+#ifdef _PATHIE_UNIX
+  std::string utf8_to_filename(const std::string& utf8);
+  std::string filename_to_utf8(const std::string& native_filename);
+  std::string convert_encodings(const char* from_encoding, const char* to_encoding, const std::string& string);
+#endif
+
+}
+
+#endif
diff --git a/src/3rd_party/pathie-cpp/include/pathie_ifstream.hpp b/src/3rd_party/pathie-cpp/include/pathie_ifstream.hpp
new file mode 100644
index 00000000..c5736b37
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/pathie_ifstream.hpp
@@ -0,0 +1,111 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_IFSTREAM_HPP
+#define PATHIE_IFSTREAM_HPP
+#include <fstream>
+
+#if defined(_WIN32) && defined(__GNUC__)
+#include <ext/stdio_filebuf.h>
+#endif
+
+#include "path.hpp"
+
+namespace Pathie {
+
+#if defined(_PATHIE_UNIX)
+  class ifstream: public std::ifstream {
+  public:
+    ifstream();
+    ifstream(char* path, std::ios_base::openmode = std::ios_base::in);
+    ifstream(std::string path, std::ios_base::openmode = std::ios_base::in);
+    ifstream(Pathie::Path path, std::ios_base::openmode = std::ios_base::in);
+
+    void open(const char* filename, ios_base::openmode mode = ios_base::in);
+    void open(const std::string& filename, ios_base::openmode mode = ios_base::in);
+    void open(const Pathie::Path& filename, ios_base::openmode mode = ios_base::in);
+  };
+
+#elif defined (_WIN32)
+#  if defined(__GNUC__)
+  /**
+   * \brief Input stream for UTF-8-encoded filenames.
+   *
+   * This class implements an interface like `std::ifstream` that works
+   * with Unicode paths regardless of the platform. Please refer to
+   * the documentation of Pathie::ofstream for more information on
+   * rationale and usage; this class works the same way as Pathie::ofstream,
+   * just for input rather than output file streams.
+   */
+  class ifstream: public std::basic_istream<char, std::char_traits<char> >
+  {
+  public:
+    typedef char char_type;                          ///< Type used inside the stream.
+    typedef std::char_traits<char> traits_type;      ///< Traits type
+    typedef typename traits_type::int_type int_type; ///< Int type
+    typedef typename traits_type::pos_type pos_type; ///< pos type
+    typedef typename traits_type::off_type off_type; ///< offset type
+
+    ifstream();
+    explicit ifstream(const char* filename, ios_base::openmode mode = ios_base::in);
+    explicit ifstream(const std::string& filename, ios_base::openmode mode = ios_base::in);
+    explicit ifstream(const Pathie::Path& filename, ios_base::openmode mode = ios_base::in);
+    ~ifstream();
+
+    __gnu_cxx::stdio_filebuf<char>* rdbuf() const;
+    bool is_open() const; // C++11 mandates const this, C++98 hadn’t that
+    void open(const char* filename, ios_base::openmode mode = ios_base::in);
+    void open(const std::string& filename, ios_base::openmode mode = ios_base::in);
+    void open(const Pathie::Path& filename, ios_base::openmode mode = ios_base::in);
+    void close();
+
+  private:
+    FILE* mp_file;
+    __gnu_cxx::stdio_filebuf<char>* mp_filebuffer;
+    bool m_buffer_allocated;
+  };
+
+#  elif defined(_MSC_VER)
+    class ifstream: public std::ifstream {
+    public:
+      ifstream();
+      ifstream(char* path, std::ios_base::openmode = std::ios_base::in);
+      ifstream(std::string path, std::ios_base::openmode = std::ios_base::in);
+      ifstream(Pathie::Path path, std::ios_base::openmode = std::ios_base::in);
+    };
+
+#  else
+#    error Unsupported compiler: do not know how to open C++ stream on Unicode file.
+#  endif
+#else
+#  error Unsupported system.
+#endif
+
+}
+#endif
diff --git a/src/3rd_party/pathie-cpp/include/pathie_ofstream.hpp b/src/3rd_party/pathie-cpp/include/pathie_ofstream.hpp
new file mode 100644
index 00000000..1ff43e6c
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/pathie_ofstream.hpp
@@ -0,0 +1,192 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PATHIE_OFSTREAM_HPP
+#define PATHIE_OFSTREAM_HPP
+
+#if defined(_WIN32) && defined(__GNUC__)
+#include <ostream>
+#include <ext/stdio_filebuf.h>
+#else
+#include <fstream>
+#endif
+
+#include "path.hpp"
+
+namespace Pathie {
+
+#if defined(_PATHIE_UNIX)
+  class ofstream: public std::ofstream {
+  public:
+    ofstream();
+    ofstream(char* path, std::ios_base::openmode = std::ios_base::out);
+    ofstream(std::string path, std::ios_base::openmode = std::ios_base::out);
+    ofstream(Pathie::Path path, std::ios_base::openmode = std::ios_base::out);
+
+    void open(const char* filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+    void open(const std::string& filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+    void open(const Pathie::Path& filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+  };
+#elif defined (_WIN32)
+#  if defined(__GNUC__)
+  /**
+   * \brief Output stream for UTF-8-encoded filenames.
+   *
+   * Unicode filenames with C++ are horrible, and this is why the Pathie library
+   * was written in the first sense. However, working with paths may be nice,
+   * but what does this mean for you if you cannot actually open the file
+   * whose path you have been manipulating? On UNIX, the `std::ofstream` class
+   * will work just as expected if you pass it a UTF-8 unicode filename and it
+   * will open exactly the path you specified. Windows however uses UTF-16LE
+   * as the encoding for pathnames, and the same code that runs on UNIX will
+   * produce garbage filenames on Windows. Take this as an example:
+   *
+   * ~~~~~~~~~~~~~~~~~ c++
+   * std::ofstream file("Bärenstark.txt");
+   * file << "Some content" << std::endl;
+   * file.close();
+   * ~~~~~~~~~~~~~~~~~
+   *
+   * The file will appear as expected on UNIX, but on Windows it will have
+   * a garbage filename because Windows interprets filenames based on the
+   * `char` type as in the local encoding (Windows-1252 on a Western European
+   * Windows system). You have to use filenames based on `wchar_t` on Windows
+   * to get the desired effect. This, however, doesn’t work neither:
+   *
+   * ~~~~~~~~~~~~~~~~~ c++
+   * std::ofstream file(L"Bärenstark.txt");
+   * file << "Some content" << std::endl;
+   * file.close()
+   * ~~~~~~~~~~~~~~~~~
+   *
+   * That is, it works on the Microsoft Visual C++ Compiler (MSVC). The reason
+   * for this is that the ISO C++ standard does not specify a constructor
+   * that takes filenames based on `wchar_t`, but only on `char`, which Windows
+   * interpretes as described above. That’s a nice proof of how Windows tries
+   * to be inherently different from all other modern OSes in this world, and
+   * how it makes simple tasks a pain if you want cross-platform behaviour.
+   * GCC on Windows, as distributed by the MinGW project, does not support the
+   * nonstandard contructor. As it stands, you **cannot** create Unicode files
+   * via the standard C++ interface with MinGW GCC. There is, however, a special
+   * function in the Windows API called `_wfopen()` that lets you at least open
+   * a file via a `fopen()`-like C API. Thankfully GCC provides a (also nonstandard)
+   * measure to create a filebuffer (this is what is used by the C++ streams
+   * under the hood to access the files) from a C `FILE*`. This class wraps
+   * that GNU C++ extension (`gnu_cxx::stdio_filebuf`) on Windows, as well as it wraps
+   * the standard stream API on other platforms. It therefore unites the different
+   * access methods under a single uniform interface that allows you to
+   * create Unicode filenames regardless of the platform you run on.
+   *
+   * Let’s revisit the previous example, now with Pathie’s streams:
+   *
+   * ~~~~~~~~~~~~~~~~~ c++
+   * Pathie::ofstream file("Bärenstark.txt");
+   * file << "Some content" << std::endl;
+   * file.close()
+   * ~~~~~~~~~~~~~~~~~
+   *
+   * The `Pathie::ofstream` constructor takes a UTF-8 string and does the
+   * necessary conversion to UTF-16, uses `_wfopen()` under the hood to access
+   * the file, and then wraps a C++ stream around the already opened file
+   * descriptor. On platforms other than MiNGW Windows, the `Pathie::ofstream` class
+   * will just delegate to the standard `std::ofstream` class. As a bonus,
+   * if you compile with MSVC the nonstandard constructor described above
+   * is used.
+   *
+   * Of course, there’s also a constructor that will make it work directly
+   * with instances of Pathie::Path:
+   *
+   * ~~~~~~~~~~~~~~~~~ c++
+   * Pathie::Path p("Bärenstark.txt");
+   * Pathie::ofstream file(p);
+   * file << "Some content" << std::endl;
+   * file.close()
+   * ~~~~~~~~~~~~~~~~~
+   *
+   * That is, you can stay with UTF-8 `char`-based strings (like `std::string`)
+   * for anything you use. Ain’t that great?
+   *
+   * \warning On Windows, this class tries to behave as similar as the standard
+   * `std::ofstream` as possible. Due to the file descriptor magic it does under
+   * the hood, however, there is a little difference: If you construct an
+   * instance of this class without associating it immediately with a filename
+   * (the constructor without arguments), using any methods apart from `is_open()`
+   * (which is specifically implemented for that purpose) that use the underlying
+   * filebuffer will result in segmentation faults, because the filebuffer has
+   * not yet been constructed (the area where it will be constructed into is
+   * full of NUL bytes if you wonder).
+   *
+   * \note Please refer to your preferred C++ STL documentation for the
+   * `std::ofstream` class for general usage of C++ file streams.
+   */
+  class ofstream: public std::basic_ostream<char, std::char_traits<char> >
+  {
+  public:
+    typedef char char_type;                          ///< Type used inside the stream.
+    typedef std::char_traits<char> traits_type;      ///< Traits type
+    typedef typename traits_type::int_type int_type; ///< Int type
+    typedef typename traits_type::pos_type pos_type; ///< pos type
+    typedef typename traits_type::off_type off_type; ///< offset type
+
+    ofstream();
+    explicit ofstream(const char* filename, ios_base::openmode mode = ios_base::out|ios_base::trunc);
+    explicit ofstream(const std::string& filename, ios_base::openmode mode = ios_base::out|ios_base::trunc);
+    explicit ofstream(const Pathie::Path& filename, ios_base::openmode mode = ios_base::out|ios_base::trunc);
+    ~ofstream();
+
+    __gnu_cxx::stdio_filebuf<char>* rdbuf() const;
+    bool is_open() const; // C++11 mandates const this, C++98 hadn’t that
+    void open(const char* filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+    void open(const std::string& filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+    void open(const Pathie::Path& filename, ios_base::openmode mode = ios_base::out | ios_base::trunc);
+    void close();
+
+  private:
+    FILE* mp_file;
+    __gnu_cxx::stdio_filebuf<char>* mp_filebuffer;
+    bool m_buffer_allocated;
+  };
+
+#  elif defined(_MSC_VER)
+    class ofstream: public std::ofstream {
+    public:
+      ofstream();
+      ofstream(char* path, std::ios_base::openmode = std::ios_base::out);
+      ofstream(std::string path, std::ios_base::openmode = std::ios_base::out);
+      ofstream(Pathie::Path path, std::ios_base::openmode = std::ios_base::out);
+    };
+#  else
+#    error Unsupported compiler: do not know how to open C++ stream on Unicode file.
+#  endif
+#else
+#  error Unsupported system.
+#endif
+
+}
+#endif
diff --git a/src/3rd_party/pathie-cpp/include/temp.hpp b/src/3rd_party/pathie-cpp/include/temp.hpp
new file mode 100644
index 00000000..02a35879
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/include/temp.hpp
@@ -0,0 +1,83 @@
+#ifndef PATHIE_TEMPDIR_HPP
+#define PATHIE_TEMPDIR_HPP
+#include "path.hpp"
+
+namespace Pathie {
+
+  /**
+   * A class for working with temporary entries; this is the
+   * superclass of Tempdir and Tempfile that encapsulates the common
+   * logic between the two. This class cannot be instanciated
+   * directly, instead use Tempdir and Tempfile.
+   *
+   * This class relies on `rand()` when generating the temporary
+   * path name.  Therefore, it is recommended to initialise the
+   * random number generator before creating instances of this class
+   * by calling the `srand()` function.
+   *
+   * In a multithreaded environment, this class generates conflicting
+   * directory names if the C random number generator is in the same state
+   * in two threads and an instance of Tempdir is constructed in these two
+   * threads in the very same second. You should not use an instance of
+   * this class in multiple threads.
+   */
+  class TempEntry
+  {
+  public:
+    TempEntry(std::string namepart);
+    virtual ~TempEntry();
+
+    virtual void remove() const = 0;
+    void keep(bool k = true);
+
+    Path path() const;
+    bool is_kept() const;
+  protected:
+    bool m_keep;
+    Path m_path;
+  };
+
+  /**
+   * Class for working with temporary directories. Creating
+   * an instance of this class creates a temporary directory,
+   * which is removed again when the object is destroyed.
+   * If you want to keep the directory for whatever reason,
+   * call TempEntry::keep().
+   *
+   * Call TempEntry::path() to retrieve the path of the
+   * generated directory.
+   *
+   * See the docs for the TempEntry class for information
+   * on how the temporary names are generated.
+   */
+  class Tempdir: public TempEntry
+  {
+  public:
+    Tempdir(std::string namepart);
+    virtual ~Tempdir();
+    virtual void remove() const;
+  };
+
+  /**
+   * Class for working with temporary files. Creating
+   * an instance of this class creates a temporary file,
+   * which is removed again when the object is destroyed.
+   * If you want to keep the file for whatever reason,
+   * call TempEntry::keep().
+   *
+   * Call TempEntry::path() to retrieve the path of the
+   * generated directory.
+   *
+   * See the docs for the TempEntry class for information
+   * on how the temporary names are generated.
+   */
+  class Tempfile: public TempEntry
+  {
+  public:
+    Tempfile(std::string namepart);
+    virtual ~Tempfile();
+    virtual void remove() const;
+  };
+}
+
+#endif /* PATHIE_TEMPDIR_HPP */
diff --git a/src/3rd_party/pathie-cpp/src/entry_iterator.cpp b/src/3rd_party/pathie-cpp/src/entry_iterator.cpp
new file mode 100644
index 00000000..e2ecb2fe
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/entry_iterator.cpp
@@ -0,0 +1,279 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/entry_iterator.hpp"
+#include "../include/path.hpp"
+#include "../include/errors.hpp"
+
+#if defined(__unix__)
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdexcept>
+#elif defined(_WIN32)
+#include <Windows.h>
+#else
+#error Unsupported system
+#endif
+
+using namespace Pathie;
+
+/**
+ * The default constructor always constructs the terminal
+ * iterator, i.e. the one you want to test for if you want
+ * to know whether an iteration has completed.
+ */
+entry_iterator::entry_iterator()
+  : mp_directory(NULL),
+    mp_cur(NULL),
+    mp_cur_path(new Path())
+{
+}
+
+/**
+ * Construct an iterator that reads the entries in the given directory.
+ */
+entry_iterator::entry_iterator(const Path* p_directory)
+  : mp_directory(p_directory),
+    mp_cur(NULL),
+    mp_cur_path(new Path())
+{
+  open_native_handle();
+}
+
+/**
+ * Destructor. Closes the open native handle, if it is open.
+ */
+entry_iterator::~entry_iterator()
+{
+  close_native_handle();
+
+  if (mp_cur_path)
+    delete mp_cur_path;
+
+  // `mp_directory' is NOT deleted, because this class does not own it!
+}
+
+/**
+ * Opens the native handle to the directory and reads the first
+ * entry from the directory.
+ */
+void entry_iterator::open_native_handle()
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = mp_directory->native();
+  mp_cur = opendir(nstr.c_str());
+
+  if (mp_cur) {
+    struct dirent* p_dirent = readdir(static_cast<DIR*>(mp_cur));
+    *mp_cur_path = filename_to_utf8(p_dirent->d_name);
+  }
+  else {
+    throw(Pathie::ErrnoError(errno));
+  }
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(mp_directory->str() + "/*");
+  WIN32_FIND_DATAW finddata;
+
+  mp_cur = FindFirstFileW(utf16.c_str(), &finddata);
+  if (static_cast<HANDLE>(mp_cur) == INVALID_HANDLE_VALUE) {
+    DWORD err = GetLastError();
+    mp_cur = NULL;
+    throw(Pathie::WindowsError(err));
+  }
+  else {
+    *mp_cur_path = utf16_to_utf8(finddata.cFileName);
+  }
+#else
+#error Unsupported system
+#endif
+}
+
+/// Helper function for closing the native handle.
+void entry_iterator::close_native_handle()
+{
+  if (!mp_cur)
+    return;
+
+#if defined(_PATHIE_UNIX)
+  closedir(static_cast<DIR*>(mp_cur));
+#elif defined(_WIN32)
+  FindClose(static_cast<HANDLE>(mp_cur));
+#endif
+
+  // Reset member variables
+  *mp_cur_path = Path();
+  mp_cur = NULL;
+}
+
+/**
+ * Increment operator. Calling this advances the iterator by one,
+ * thus pointing it to the next entry. If the end is reached,
+ * the iterator will compare equal to the return value of the
+ * default constructor, and dereferencing it yields an undefined
+ * result.
+ *
+ * \remark Note that this operator does *not* return the old value
+ * the iterator had, simply because that would mean copying the
+ * receiver first, and copying instances of this class is not
+ * possible. Thus, *do not rely* on the return value of this
+ * method.
+ */
+entry_iterator& entry_iterator::operator++(int)
+{
+  if (mp_cur) {
+#if defined(_PATHIE_UNIX)
+    struct dirent* p_dirent = readdir(static_cast<DIR*>(mp_cur));
+    if (p_dirent) {
+      *mp_cur_path = filename_to_utf8(p_dirent->d_name);
+    }
+    else {
+      close_native_handle();
+    }
+#elif defined(_WIN32)
+    WIN32_FIND_DATAW finddata;
+    if (FindNextFileW(static_cast<HANDLE>(mp_cur), &finddata)) {
+      *mp_cur_path = utf16_to_utf8(finddata.cFileName);
+    }
+    else {
+      close_native_handle();
+    }
+#else
+#error Unsupported system
+#endif
+  }
+  else { // Finished already
+    throw(std::range_error("Tried to advance a finished entry_iterator!"));
+  }
+
+  return *this;
+}
+
+/// Same as the other operator++().
+entry_iterator& entry_iterator::operator++()
+{
+  return (operator++());
+}
+
+/**
+ * Derefence operator. Returns the entry the iterator currently
+ * points at.
+ */
+const Path& entry_iterator::operator*() const
+{
+  return *mp_cur_path;
+}
+
+/**
+ * Resets this iterator to start again on the path given.
+ */
+entry_iterator& entry_iterator::operator=(const Path* p_directory)
+{
+  close_native_handle();
+  mp_directory = p_directory;
+  open_native_handle();
+  return *this;
+}
+
+/**
+ * Boolean operator. In comparisons, this iterator is true if
+ * it has not yet finished, false otherwise.
+ */
+entry_iterator::operator bool() const
+{
+  return !!mp_directory;
+}
+
+/**
+ * Equality test. Two instances of this class are equal if:
+ *
+ * 1. If `other` is a terminal iterator as created by the parameterless
+ *    constructor: if the receiver has finished iterating the directory.
+ * 2. If `other` is not a terminal iterator as described: if both
+ *    iterators refer to the same top directory and their current
+ *    native handle is the same and in the same state (hint: this
+ *    is not going to happen under normal circumstances).
+ */
+bool entry_iterator::operator==(const entry_iterator& other) const
+{
+  if (other.mp_directory == NULL) {
+    /* `mp_directory' is only null for the terminal iterator, that is,
+     * a test for the terminal iterator was requested. An entry_iterator
+     * is terminated when `mp_cur' is null, so that's what is returned
+     * in reality when a test with the terminal iterator is
+     * requested. */
+    return !mp_cur;
+  }
+  else {
+    return mp_directory == other.mp_directory && mp_cur == other.mp_cur;
+  }
+}
+
+/// Inverse of operator==().
+bool entry_iterator::operator!=(const entry_iterator& other) const
+{
+  return !(*this == other);
+}
+
+/**
+ * Derefence operator. Returns the entry the iterator currently
+ * points at.
+ */
+const Path* entry_iterator::operator->() const
+{
+  return mp_cur_path;
+}
+
+/// "Copy" constructor -- see class docs for more info.
+entry_iterator::entry_iterator(const entry_iterator& other)
+  : mp_directory(other.mp_directory),
+    mp_cur(other.mp_cur),
+    mp_cur_path(other.mp_cur_path)
+{
+  entry_iterator& e = const_cast<entry_iterator&>(other);
+  e.mp_directory    = NULL;
+  e.mp_cur          = NULL;
+  e.mp_cur_path     = new Path();
+}
+
+/// "Copy" assignment -- see class docs for more info.
+entry_iterator& entry_iterator::operator=(const entry_iterator& other)
+{
+  mp_directory      = other.mp_directory;
+  mp_cur            = other.mp_cur;
+  mp_cur_path       = other.mp_cur_path;
+
+  entry_iterator& e = const_cast<entry_iterator&>(other);
+  e.mp_directory    = NULL;
+  e.mp_cur          = NULL;
+  e.mp_cur_path     = new Path();
+
+  return *this;
+}
+
diff --git a/src/3rd_party/pathie-cpp/src/errors.cpp b/src/3rd_party/pathie-cpp/src/errors.cpp
new file mode 100644
index 00000000..f5e406b1
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/errors.cpp
@@ -0,0 +1,150 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/errors.hpp"
+
+#include <cerrno>
+#include <cstring>
+#include <sstream>
+
+#if defined(_WIN32)
+#include <windows.h>
+#elif defined(_PATHIE_UNIX)
+#include <glob.h>
+#endif
+
+using namespace Pathie;
+
+PathieError::PathieError()
+{
+  m_pathie_errmsg = "Unknown pathie exception.";
+}
+
+PathieError::PathieError(std::string message)
+{
+  m_pathie_errmsg = message;
+}
+
+PathieError::~PathieError() throw()
+{
+  //
+}
+
+const char* PathieError::what() const throw()
+{
+  return m_pathie_errmsg.c_str();
+}
+
+ErrnoError::ErrnoError(int val)
+{
+  std::stringstream ss;
+  ss << val;
+
+  m_val = val;
+  m_pathie_errmsg = "Errno " + ss.str() + ": " + strerror(val);
+}
+
+ErrnoError::~ErrnoError() throw()
+{
+  //
+}
+
+#ifdef _WIN32
+WindowsError::WindowsError(DWORD val)
+{
+  std::stringstream ss;
+  ss << val;
+
+  wchar_t* buf = NULL;
+  FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+                 NULL,
+                 val,
+                 LANG_USER_DEFAULT,
+                 (wchar_t*) &buf, // What a weird API.
+                 0,
+                 NULL);
+
+  m_val = val;
+  m_pathie_errmsg = std::string("Windows Error Code ") + ss.str() + ": " + utf16_to_utf8(buf);
+
+  LocalFree(buf);
+}
+
+WindowsError::~WindowsError() throw()
+{
+  //
+}
+
+WindowsHresultError::WindowsHresultError(HRESULT val)
+{
+  std::stringstream ss;
+  ss << val;
+
+  m_val = val;
+  m_pathie_errmsg = std::string("Windows HRESULT Error Code :") + ss.str();
+}
+
+WindowsHresultError::~WindowsHresultError() throw()
+{
+  //
+}
+
+#endif
+
+#ifdef _PATHIE_UNIX
+GlobError::GlobError(int val)
+{
+  std::stringstream ss;
+  ss << val;
+
+  m_val = val;
+
+  m_pathie_errmsg = "Glob error code " + ss.str() + ": ";
+
+  switch(val) {
+  case GLOB_NOSPACE:
+    m_pathie_errmsg += "GLOB_NOSPACE";
+    break;
+  case GLOB_ABORTED:
+    m_pathie_errmsg += "GLOB_ABORTED";
+    break;
+  case GLOB_NOMATCH:
+    m_pathie_errmsg += "GLOB_NOMATCH";
+    break;
+  default:
+    m_pathie_errmsg += "Unknown glob error";
+    break;
+  }
+}
+
+GlobError::~GlobError() throw()
+{
+  //
+}
+#endif
diff --git a/src/3rd_party/pathie-cpp/src/path.cpp b/src/3rd_party/pathie-cpp/src/path.cpp
new file mode 100644
index 00000000..99185085
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/path.cpp
@@ -0,0 +1,3348 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/path.hpp"
+#include "../include/pathie.hpp"
+#include "../include/errors.hpp"
+
+#include <cstdlib>
+#include <cstdio>
+#include <ctime>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdexcept>
+#include <errno.h>
+
+#if defined(_WIN32)
+#include <windows.h>
+#include <winioctl.h>
+#include <direct.h>
+#include <shlobj.h>
+#include <shlwapi.h>
+//#include <ntifs.h> // Currently not in msys2
+
+#elif defined(_PATHIE_UNIX)
+#include <unistd.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/param.h> // defines "BSD" macro on BSD systems
+#include <pwd.h>
+#include <glob.h>
+#include <fnmatch.h>
+
+#else
+#error Unsupported system.
+#endif
+
+#ifdef BSD
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#endif
+
+using namespace Pathie;
+using namespace std;
+
+Path::localpathtype Path::c_localdefault = LOCALPATH_LOCAL;
+
+/**
+ * The default constructor. It does **not** create an empty
+ * path, but a path whose value is ".", i.e. the current
+ * working directory as a relative path (see also pwd()).
+ */
+Path::Path()
+{
+  m_path = ".";
+}
+
+/**
+ * Copies contents from path to a new instance.
+ *
+ * \param[in] path The Path instance to copy.
+ */
+Path::Path(const Path& path)
+{
+  m_path = path.m_path;
+}
+
+/**
+ * This constructs a path from a given std::string.
+ *
+ * \param path String to construct from. Must be encoded in UTF-8.
+ *
+ * \returns a new instance of class Path.
+ */
+Path::Path(std::string path)
+{
+  m_path = path;
+  sanitize();
+}
+
+/**
+ * Constructs a Path instance from a list of path components.
+ * This is the inverse of the burst() method.
+ *
+ * \param[in] components List of components to join.
+ *
+ * \returns A new instance.
+ */
+Path::Path(const std::vector<Path>& components)
+{
+  m_path = components.front().m_path;
+
+  if (components.size() > 1) {
+    // Ensure that for both absolute and relative path we end in
+    // a slash for appending below
+    if (m_path[0] != '/') {
+      m_path += "/";
+    }
+
+    std::vector<Path>::const_iterator iter;
+    for(iter=components.begin()+1; iter != components.end(); iter++) { // first element has already been taken care of above
+      m_path += (*iter).m_path + "/";
+    }
+
+    // Trailing slash is unwanted, remove it
+    m_path = m_path.substr(0, m_path.length()-1);
+  }
+}
+
+/**
+ * Sanitizes the path. It:
+ *
+ * 1. Replaces any backslashes with forward slashes (read Windows).
+ * 2. Replaces all double forward slashes with single forward slashes
+ * 3. Delates a trailing slash, if any.
+ */
+void Path::sanitize()
+{
+  // Replace any backslashes \ with forward slashes /.
+  size_t cur = string::npos;
+  while ((cur = m_path.find("\\")) != string::npos) { // assignment intended
+    m_path.replace(cur, 1, "/");
+  }
+
+  // Replace all double slashes // with a single one
+  cur = string::npos;
+  while ((cur = m_path.find("//")) != string::npos) { // assignment intended
+    m_path.replace(cur, 2, "/");
+  }
+
+  // Remove trailing slash if any (except for the filesystem root)
+  long len = m_path.length();
+#if defined(_PATHIE_UNIX)
+  if (len > 1 && m_path[len - 1] == '/')
+    m_path = m_path.substr(0, len - 1);
+#elif defined(_WIN32)
+  if (len > 1) { // / is root of current drive, "x" is the relative path "./x"
+    // Check if X:/foo/bar
+    if (len > 3 && m_path[len - 1] == '/') { // More than 3 chars cannot be root
+      m_path = m_path.substr(0, len - 1);
+    }
+    else { // Only drive root?
+      if (m_path[1] == ':') {
+        // Here m_path must be a drive root. The colon ":" is not allowed in paths on Windows except as the 2nd char to denote the drive letter
+        if (len == 2) { // Whoa -- "X:" misses leading / for drive root, append it
+          m_path.append("/");
+        }
+        else if (len == 3 && m_path[2] != '/') { // Whoa -- "X:f" misses leading / for root directory, insert it
+          m_path.insert(2, "/");
+        }
+        // else length is 3 with a slash, i.e. "X:/". This is fine and shall not be touched.
+      }
+      else { // not a drive root, delete trailing / if any
+        if (m_path[len - 1] == '/') {
+          m_path = m_path.substr(0, len - 1);
+        }
+      }
+    }
+  }
+#else
+#error Unsupported system
+#endif
+}
+
+/** \name Conversion methods
+ *
+ * Convert a path to other objects.
+ */
+///@{
+
+/**
+ * Returns a copy of the underlying `std::string`. This is always
+ * encoded in UTF-8, regardless of the operating system.
+ *
+ * \see native() utf8_str()
+ */
+std::string Path::str() const
+{
+  return m_path;
+}
+
+/**
+ * This method does the same as str(). It exists to make code using
+ * the UTF-8 variant more readable, because one tends to forget
+ * whether str() returns the native or the UTF-8 variant.
+ *
+ * \see native() str()
+ */
+std::string Path::utf8_str() const
+{
+  return m_path;
+}
+
+#if defined(_PATHIE_UNIX)
+std::string Path::native() const
+{
+  return utf8_to_filename(m_path);
+}
+
+#elif defined(_WIN32)
+/**
+ * Returns the path in the platform’s native format. Note
+ * that this method returns a `std::string` on UNIX,
+ * whereas it returns a `std::wstring` on Windows.
+ *
+ * On Windows, the returned string also uses exclusively backslashes
+ * instead of forward slashes. It is encoded in UTF-16LE.
+ *
+ * On UNIX, the returned string is in the encoding dictated by the locale
+ * ($LANG and $LC_ALL variables).
+ */
+std::wstring Path::native() const
+{
+  std::string dup(m_path);
+
+  size_t pos = 0;
+  while((pos = dup.find("/", pos)) != std::string::npos) { // Single = intended
+    dup.replace(pos, 1, "\\");
+  }
+
+  return utf8_to_utf16(dup);
+}
+#else
+#error Unsupported system.
+#endif
+
+///@}
+
+
+/** \name Path decomposition
+ *
+ * Retrieve the parts of the path you want.
+ */
+///@{
+
+/**
+ * Returns the path’s basename, i.e. the last component
+ * of the path, including the file excention.
+ *
+ * For example, "/foo/bar.txt" has a basename of "bar.txt",
+ * and "/foo/bar" has a basename of "bar".
+ *
+ * \returns a new Path instance with only the basename.
+ *
+ * \see dirname()
+ */
+Path Path::basename() const
+{
+  if (m_path == ".")
+    return Path(".");
+  else if (m_path == "..")
+    return Path("..");
+  else if (is_root())
+    return Path(m_path);
+
+  size_t pos = 0;
+  if ((pos = m_path.rfind("/")) != string::npos) // Single = intended
+    return Path(m_path.substr(pos + 1));
+  else
+    return Path(m_path);
+}
+
+/**
+ * Returns the path’s dirname, i.e. all components of the
+ * path except for the basename component (see basename()).
+ *
+ * For example, "/foo/bar/baz.txt" has a dirname of "/foo/bar",
+ * and "/foo/bar/baz" has a dirname of "/foo/bar".
+ *
+ * \returns a new Path instance with only the dirname.
+ *
+ * \see basename() parent()
+ */
+Path Path::dirname() const
+{
+  if (m_path == ".")
+    return Path(".");
+  else if (m_path == "..")
+    return Path(".");
+  else if (is_root())
+    return Path(m_path);
+
+  size_t pos = 0;
+  if ((pos = m_path.rfind("/")) != string::npos) { // Single = intended
+    if (pos == 0) { // /usr
+      return root();
+    }
+#ifdef _WIN32
+    else if (pos == 1 && m_path[1] == ':') { // X:/foo
+      return root();
+    }
+#endif
+    else { // regular/path or /regular/path
+      return Path(m_path.substr(0, pos));
+    }
+  }
+  else // single relative directory
+    return Path(".");
+}
+
+/**
+ * This is a convenience method that allows you to retrieve
+ * both the dirname() and the basename() in one call.
+ *
+ * \param[out] dname Receives the dirname() value.
+ * \param[out] bname Receives the basename() value.
+ */
+void Path::split(Path& dname, Path& bname) const
+{
+  dname = dirname();
+  bname = basename();
+}
+
+/**
+ * This method returns the file extension of the path,
+ * if possible; otherwise it returns an empty string.
+ * Filenames that consist entirely of a "file extension",
+ * i.e. ".txt" or "/foo/.txt" will return an empty string.
+ */
+std::string Path::extension() const
+{
+  if (m_path == ".")
+    return "";
+  else if (m_path == "..")
+    return "";
+
+  size_t pos = 0;
+  if ((pos = m_path.rfind(".")) != string::npos) { // assignment intended
+    if (pos == 0 || pos == m_path.length() - 1) // .foo and foo.
+      return "";
+    else {
+      if (m_path[pos - 1] == '/') // foo/.txt
+	return "";
+      else
+	return m_path.substr(pos);
+    }
+  }
+  else
+    return "";
+}
+
+/**
+ * This is the same as dirname() and is provided only for convenience.
+ *
+ * \see dirname()
+ */
+Path Path::parent() const
+{
+  return dirname();
+}
+
+/**
+ * Returns the number of components in the path string, or
+ * in different words, counts the slashes and adds one for
+ * the last element, except if the path is just the root
+ * (see is_root()).
+ *
+ * The return value of this method minus one is the last
+ * possible index for operator[].
+ */
+size_t Path::component_count() const
+{
+  if (is_root())
+    return 1;
+
+  size_t result = 0;
+  size_t pos = 0;
+  while ((pos = m_path.find("/", pos)) != string::npos) { // Assignment intended
+    result++;
+    pos++;
+  }
+
+  return ++result;
+}
+
+/**
+ * Returns the filesystem root for this path. On UNIX,
+ * this will always return /, but on Windows it will
+ * return X:/ if the referenced path is an absolute path
+ * with drive letter, and / if the referenced path is
+ * a relative path or an absolute path on the current
+ * drive.
+ */
+Path Path::root() const
+{
+#if defined(_PATHIE_UNIX)
+  return Path("/");
+#elif defined(_WIN32)
+  // Check if we have an absolute path with drive,
+  // otherwise return the root for the current drive.
+  if (m_path[1] == ':') // Colon is on Windows only allowed here to denote a preceeding drive letter => absolute path
+    return Path(m_path.substr(0, 3));
+  else
+    return Path("/");
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * This method splits up the paths into its separate components,
+ * i.e. it splits it up at every /, except for the leading / of
+ * an absolute path, which is considered a component on its own
+ * and is thus the first element of a bursted absolute path.
+ *
+ * \param descend (`false`) If this is true, keeps the parent paths when bursting.
+ *
+ * \returns A vector of Path instances, where each instance
+ * corresponds to one component of the Path.
+ *
+ * Example:
+ *
+ * ~~~~~~~~~~~~~~~~~~~~ c++
+ * Path p("/tmp/foo/bar");
+ * p.burst(); // => /, tmp, foo, bar
+ * p.burst(true); // => /, /tmp, /tmp/foo, /tmp/foo/bar
+ * ~~~~~~~~~~~~~~~~~~~~
+ */
+std::vector<Path> Path::burst(bool descend /* = false */) const
+{
+  size_t pos = 0;
+  size_t lastpos = 0;
+  std::vector<Path> results;
+  std::string prefix;
+
+  // Take care of leading / of absolute paths
+  if (m_path[0] == '/') {
+    results.push_back(Path("/"));
+    prefix.append("/");
+
+    // Adjust pos so we don’t find the initial /
+    pos++;
+    lastpos++;
+  }
+
+  while((pos = m_path.find("/", pos)) != string::npos) {
+    std::string component = m_path.substr(lastpos, pos - lastpos);
+
+    if (descend) {
+      results.push_back(Path(prefix + component));
+      prefix.append(component);
+      prefix.append("/");
+    }
+    else {
+      results.push_back(Path(component));
+    }
+
+    lastpos = pos + 1;
+    pos++;
+  }
+
+  std::string lastcomponent = m_path.substr(lastpos);
+
+  if (descend)
+    results.push_back(Path(prefix + lastcomponent)); // Note no trailing /
+  else
+    results.push_back(Path(lastcomponent));
+
+  return results;
+}
+
+///@}
+
+/** \name Path expansion
+ *
+ * Expand paths to a more fuller version without shortcuts.
+ */
+
+///@{
+
+/**
+ * This method, removes all occurences of . and .. from the path,
+ * leaving a clean filesystem path.
+ *
+ * Note that neither an absolute path is created, nor
+ * are shortcuts other than . and .. expanded.
+ *
+ * This method does not access file filesystem, and thus does not
+ * know about symbolic links. Therefore, if the path contains symlinks,
+ * the result may not be the way you expect it. Use real() if
+ * you need to resolve all your symbolic links in the path.
+ *
+ * For example, if you have a directory `/tmp/foo`, which contains a
+ * symbolic link `bar` that points to `/tmp/bar`, then a path of
+ * `/tmp/foo/bar/..` will be prune()d to `/tmp/foo`, although the
+ * canonically correct result is `/tmp`. The latter is what you will
+ * get if you use real().
+ *
+ * \returns A new string with . and .. removed.
+ *
+ * \see expand() real()
+ */
+Path Path::prune() const
+{
+  std::string newpath(m_path); // copy
+  size_t pos = 0;
+  while((pos = newpath.find("/.", pos)) != string::npos) { // assignment intended
+    if (newpath.substr(pos, 3) == "/..") {
+
+      // Weird path like /..foo or foo/..bar, which are NOT relative paths
+      if (newpath.length() > pos + 3 && newpath[pos + 3] != '/') {
+        // Do not reset `pos' -- this has to stay. Advance to the next char.
+        pos++;
+        continue;
+      }
+
+      if (pos == 0) {
+        // /.. at beginning of string, replace with root / (/ on Windows is root on current drive)
+        newpath.erase(pos, 3);
+
+        // Whoops -- the entire string was just "/.."
+        if (newpath.empty()) {
+          newpath.append("/");
+        }
+      }
+#ifdef _WIN32
+      // Cater for paths with drive X:/ on Windows
+      else if (pos == 2 && newpath[1] == ':') { // ":" is on Windows only allowed at pos 1, where it signifies the preceding char is a drive letter
+        // X:/. or X:/.. at beginning of string
+        if(newpath.length() > 4 && newpath[4] == '.') { // X:/..
+          // Prevent special case "X:/..foo", which is directory "..foo" under the root
+          if (newpath.length() <= 5 || newpath[5] != '/') {
+            // X:/.. or X:/../foo/bar at beginning of string, replace with drive root
+            newpath.erase(pos, 3);
+          }
+        }
+        else { // X:/./foo/bar X:/..foo
+          // Prevent special case "X:/.foo", which is directory ".foo" under the root
+          if (newpath.length() <= 4 || newpath[4] != '/') {
+            // X:/. or X:/./foo/bar at beginning of string, replace with drive root
+            newpath.erase(pos, 2);
+          }
+        }
+
+        if (newpath.length() == 2) {
+          // Whoops -- the entire string was just "X:/.." or "X:/."
+          newpath.append("/");
+        }
+      }
+#endif
+      else {
+        size_t pos2 = 0;
+        if ((pos2 = newpath.rfind("/", pos - 1)) != string::npos) { // assignment intended
+          // Remove parent directory.
+          newpath.erase(pos2, pos - pos2 + 3);
+        }
+        else { // ../ for relative path (as in foo/../baz.txt)
+          newpath.erase(0, pos + 4);
+        }
+      }
+    }
+    else { // Single /.
+
+      // Weird path like /..foo or foo/..bar, which are NOT relative paths
+      if (newpath.length() > pos + 2 && newpath[pos + 2] != '/') {
+        // Do not reset `pos' -- this has to stay. Advance to the next char.
+        pos++;
+        continue;
+      }
+
+      newpath.erase(pos, 2);
+
+      // Whoops -- the entire string was just "/."
+      if (newpath.empty()) {
+        newpath.append("/");
+      }
+    }
+
+    // Reset as we have modified the string and might need to go again over it
+    pos = 0;
+  }
+
+  /* If we are empty now, the original string was a one-element
+   * relative path with .. appended. We cannot know what to set
+   * without referring to pwd(), which is external access and
+   * forbidden for this method. So instead, we do the one sane thing
+   * and just use ".". */
+  if (newpath.empty())
+    newpath = ".";
+
+  return Path(newpath);
+}
+
+/**
+ * \note Under specific circumstances (see below), this method
+ * accesses the file system.
+ *
+ * This method creates an absolute path by use of prune(), but
+ * additionally expands any expandable strings. If one of the
+ * following substitution sequences are encountered, it will be
+ * replaced accordingly.
+ *
+ * "~" is expanded to the user’s home directory, see home().
+ *
+ * \returns a new instance with everything expanded.
+ *
+ * \remark This method uses prune() to expand ".." entries, therefore
+ * it will not consider symbolic links when resolving those. Use
+ * real() if you need to do that.
+ *
+ * \see prune() real()
+ */
+Path Path::expand() const
+{
+  Path path(*this); // copy
+
+  if (m_path[0] != '~')
+    path = path.absolute();
+
+  std::string str = path.str();
+  if (str[0] == '~') {
+    Path homepath = home();
+
+    if (str[1] == '/' || str.length() == 1) {
+      // User home requested
+      str.replace(0, 1, homepath.m_path);
+    }
+
+    path = Path(str);
+  }
+
+  return path.prune();
+}
+
+/**
+ * \note This method acceses the filesystem.
+ *
+ * This is the bruteforce method for determing the real path
+ * of the entry in question on the filesystem. It looks on
+ * each single component of the path, checks if it is a
+ * symbolic link, and if so, resolves it.
+ *
+ * This method supports symbolic link resolving only on UNIX.
+ *
+ * It still does not consider hardlinks, mountpoints, and junctions,
+ * though. However, a hardlink is a real second valid name for an
+ * object; in contrast to a symbolic link, if one hardlink gets
+ * removed, the other one stays still valid. If you remove the file a
+ * symbolic link points to, the link breaks. Thus, it is not even
+ * possible to determine which of two hardlinks to a file is the
+ * "primary" one. Mountpoints and junctions (junctions are on Windows
+ * what mountpoints are on UNIX) behave similar with respect to
+ * entire directory hierarchies.
+ *
+ * \see expand() prune()
+ */
+Path Path::real() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+  char path[PATH_MAX];
+  if (!realpath(nstr.c_str(), path))
+    throw(Pathie::ErrnoError(errno));
+
+  return Path(filename_to_utf8(path));
+#elif defined(_WIN32)
+  // On Windows there sadly is no easy way to do this. We can
+  // only determine if a given path is a symlink and resolve it...
+  // Instructions taken from: http://msdn.microsoft.com/en-us/library/windows/desktop/aa363940%28v=vs.85%29.aspx
+  std::vector<Path> components = burst();
+  unsigned int pos = 0;
+
+  while (pos < components.size()) {
+    // Build path consisting of all elements upto our position pointer
+    Path reduced_path(components.front());
+    if (components.size() - pos > 1) {
+      for (unsigned int i=1; i <= pos; i++) { // i=0 is already in the initialization above
+        reduced_path = reduced_path.join(components[i]);
+      }
+    }
+
+    // If that’s a symlink, resolve it and replace our path until
+    // the symlink with the symlink’s target.
+    /*std::wstring reduced_path_utf16 = utf8_to_utf16(reduced_path.m_path);
+    if (is_ntfs_symlink(reduced_path_utf16.c_str())) {
+      wchar_t* target_utf16 = read_ntfs_symlink(reduced_path_utf16.c_str());
+      Path target(utf16_to_utf8(target_utf16));
+      std::vector<Path> target_components = target.burst();
+      free(target_utf16);
+
+      // Replace all components up to pos with the symlink target
+      components.erase(components.begin(), components.begin() + pos);
+      std::vector<Path> temp(components);
+      components.clear();
+      for(auto iter=target_components.begin(); iter != target_components.end(); iter++)
+        components.push_back(*iter);
+      for(auto iter=temp.begin(); iter != temp.end(); iter++)
+        components.push_back(*iter);
+    }
+    else {*/
+      // Note a symlink can point to another symlink, so we can only
+      // advance to the next element if this element has been tested
+      // for not being a symlink.
+      pos++;
+      //}
+  }
+
+  // BUild a new path from the now resolved components
+  Path result(components.front());
+  if (components.size() > 1) {
+    for(std::vector<Path>::const_iterator iter=components.begin();
+    		iter != components.end(); iter++) {
+      result = result.join(*iter);
+    }
+  }
+
+  return result;
+#else
+#error Unsupported system.
+#endif
+}
+
+// Msys2 does currently not have ntifs.h windows header, which
+// is required for reading NTFS symlinks.
+#if 0
+//#ifdef __WIN32
+/*
+ * Checking if a file is a symlink under Windows is insane.
+ * See http://msdn.microsoft.com/en-us/library/windows/desktop/aa363940%28v=vs.85%29.aspx
+ * for the detailed instructions by Microsoft on how to do
+ * that.
+ */
+bool Path::is_ntfs_symlink(const wchar_t* path) const
+{
+  // First we need to obtain the file attributes.
+  DWORD attrs = GetFileAttributesW(path);
+  if (attrs == INVALID_FILE_ATTRIBUTES) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  /* These file attributes must contain the REPARSE_POINT attribute
+   * that mark the file as being symlink, junction, or similar.
+   * Actually, reparse points can contain many more custom data, but
+   * we are not intersted in those. */
+  if (attrs & FILE_ATTRIBUTE_REPARSE_POINT) {
+    // Now we have to retrieve a special attributes handle from the file.
+    WIN32_FIND_DATAW finddata;
+    HANDLE findhandle = FindFirstFileW(path, &finddata);
+    if (findhandle == INVALID_HANDLE_VALUE) {
+      DWORD err = GetLastError();
+      throw(Pathie::WindowsError(err));
+    }
+    FindClose(findhandle);
+
+    // These extended attributes contain the SYMLINK tag if this file
+    // is a symlink.
+    if (finddata.dwReserved0 & IO_REPARSE_TAG_SYMLINK)
+      return true;
+
+    // Junction or so, we do not resolve that
+    return false;
+  }
+
+  // Regular file
+  return false;
+}
+
+/*
+ * Reading the link target also is insanely hard.
+ * The process is documented at http://msdn.microsoft.com/en-us/library/windows/desktop/aa365503%28v=vs.85%29.aspx
+ * in general. The key function is DeviceIoControl(), documented
+ * at http://msdn.microsoft.com/en-us/library/windows/desktop/aa363216%28v=vs.85%29.aspx
+ * .
+ *
+ * This function does not check if `path` is a symlink, but assumes it.
+ * It will exhibit unexpactable behaviour if this assumption is wrong.
+ *
+ * The returned pointer must be freed by you.
+ */
+wchar_t* Path::read_ntfs_symlink(const wchar_t* path) const
+{
+  // We have to open the file (directories are files on Windows also) first.
+  HANDLE filehandle = CreateFileW(path, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT, NULL);
+  if (filehandle == INVALID_HANDLE_VALUE) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  // This infamous structure is documented here: http://msdn.microsoft.com/en-us/library/ff552012.aspx
+  unsigned long reparsebufsize = REPARSE_GUID_DATA_BUFFER_HEADER_SIZE; // According to docs this is the minimum size
+  REPARSE_DATA_BUFFER* p_reparse_data = NULL;
+  while (true) {
+    reparsebufsize += 4096; // Do you have a better guess?
+    p_reparse_data = (REPARSE_DATA_BUFFER*) realloc(p_reparse_data, reparsebufsize);
+    memset(p_reparse_data, '\0', reparsebufsize);
+
+    DWORD bytecount = 0;
+    // Obtain the reparse tag. FSCTL_GET_REPARSE_POINT is documented here: http://msdn.microsoft.com/en-us/library/windows/desktop/aa364571(v=vs.85).aspx
+    if (DeviceIoControl(filehandle, FSCTL_GET_REPARSE_POINT, NULL, 0, p_reparse_data, reparsebufsize, &bytecount, NULL) == 0) {
+      DWORD errsav = GetLastError();
+      if (errsav == ERROR_INSUFFICIENT_BUFFER) { // buffer was to small, try again
+        continue;
+      }
+      else {
+        throw(Pathie::WindowsError(errsav));
+      }
+    }
+    else { // success
+      break;
+    }
+  }
+
+  // See also http://msdn.microsoft.com/en-us/library/windows/desktop/aa365511(v=vs.85).aspx
+  // And this one: http://www.codeproject.com/Articles/21202/Reparse-Points-in-Vista
+  if (p_reparse_data->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
+    wchar_t* subsname  = (wchar_t*) malloc(p_reparse_data->SymbolicLinkReparseBuffer.SubstituteNameLength + 2); // UTF-16 NUL
+    wchar_t* printname = (wchar_t*) malloc(p_reparse_data->SymbolicLinkReparseBuffer.PrintNameLength + 2); // UTF-16 NUL
+
+    memset(subsname,  '\0', p_reparse_data->SymbolicLinkReparseBuffer.SubstituteNameLength + 2);
+    memset(printname, '\0', p_reparse_data->SymbolicLinkReparseBuffer.PrintNameLength + 2);
+
+    wcsncpy(subsname,  &p_reparse_data->SymbolicLinkReparseBuffer.PathBuffer[p_reparse_data->SymbolicLinkReparseBuffer.SubstituteNameOffset], p_reparse_data->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(WCHAR));
+    wcsncpy(printname, &p_reparse_data->SymbolicLinkReparseBuffer.PathBuffer[p_reparse_data->SymbolicLinkReparseBuffer.PrintNameOffset], p_reparse_data->SymbolicLinkReparseBuffer.PrintNameLength / sizeof(WCHAR));
+
+    // Actually, it appears the subsname has no real usecase...
+    free(subsname);
+    free(p_reparse_data);
+    CloseHandle(filehandle);
+    return printname;
+  }
+  else {
+    return NULL;
+  }
+}
+#endif
+
+///@}
+
+/** \name Special files and directories
+ *
+ * Files and directories with a special meaning that did not
+ * fit in the other groups.
+ */
+///@{
+
+/**
+ * Determines the current process working directory and returns
+ * it as an absolute path. Contains a leading drive letter on
+ * Windows.
+ */
+Path Path::pwd()
+{
+#if defined(_PATHIE_UNIX)
+  char cwd[PATH_MAX];
+  if (getcwd(cwd, PATH_MAX) != NULL)
+    return Path(filename_to_utf8(cwd));
+  else
+    throw(std::runtime_error("Failed to retrieve current working directory."));
+#elif defined(_WIN32)
+  wchar_t cwd[MAX_PATH];
+  if (GetCurrentDirectoryW(MAX_PATH, cwd) == 0)
+    throw(std::runtime_error("Failed to retrieve current working directory."));
+  else
+    return Path(utf16_to_utf8(std::wstring(cwd)));
+#else
+#error Unsupported platform.
+#endif
+}
+
+/**
+ * \note On Linux, this method accesses the `/proc` filesystem.
+ *
+ * This method returns the full absolute path to the currently running
+ * executable.
+ */
+Path Path::exe()
+{
+#if defined(__linux__)
+  char buf[PATH_MAX];
+  ssize_t size = ::readlink("/proc/self/exe", buf, PATH_MAX);
+
+  if (size < 0)
+    throw(Pathie::ErrnoError(errno));
+
+  return Path(filename_to_utf8(std::string(buf, size)));
+#elif defined(BSD)
+  // BSD does not have /proc mounted by default. However, using raw syscalls,
+  // we can figure out what would have been in /proc/curproc/file. See
+  // sysctl(3) for the management info base identifiers that are used here.
+  int mib[4];
+  char buf[PATH_MAX];
+  size_t bufsize = PATH_MAX;
+  mib[0] = CTL_KERN;
+  mib[1] = KERN_PROC;
+  mib[2] = KERN_PROC_PATHNAME;
+  mib[3] = -1; // According to sysctl(3), -1 means the current process.
+
+  if (sysctl(mib, 4, buf, &bufsize, NULL, 0) != 0) // Note this changes `bufsize' to the number of chars copied
+    throw(Pathie::ErrnoError(errno));
+
+  return Path(filename_to_utf8(std::string(buf, bufsize - 1))); // Exclude terminating NUL
+#elif defined(_WIN32)
+  wchar_t buf[MAX_PATH];
+  if (GetModuleFileNameW(NULL, buf, MAX_PATH) == 0) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  std::string str = utf16_to_utf8(buf);
+  return Path(str);
+#else
+#error Unsupported platform.
+#endif
+}
+
+/**
+ * This method returns the current user’s home directory. On UNIX
+ * systems, the $HOME environment variable is consulted, whereas
+ * on Windows the Windows API is queried for the directory.
+ *
+ * It will throw std::runtime_error if $HOME is not defined on
+ * UNIX.
+ */
+Path Path::home()
+{
+#if defined(_PATHIE_UNIX)
+  char* homedir = getenv("HOME");
+  if (homedir)
+    return Path(filename_to_utf8(homedir));
+  else
+    throw(std::runtime_error("$HOME not defined."));
+#elif defined(_WIN32)
+  /* TODO: Switch to KNOWNFOLDERID system as explained
+   * on http://msdn.microsoft.com/en-us/library/windows/desktop/bb762494%28v=vs.85%29.aspx
+   * and http://msdn.microsoft.com/en-us/library/windows/desktop/bb762181%28v=vs.85%29.aspx
+   *. Howevever, MinGW does currently (September 2014) not have
+   * the new KNOWNFOLDERID declarations.
+   */
+
+  wchar_t homedir[MAX_PATH];
+  if (SHGetFolderPathW(NULL, CSIDL_PROFILE, NULL, SHGFP_TYPE_CURRENT, homedir) != S_OK)
+    throw(std::runtime_error("Home directory not defined."));
+
+  return Path(utf16_to_utf8(homedir));
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Handling of absolute and relative paths
+ *
+ * Converting relative paths to absolute ones and vice-versa.
+ */
+///@{
+
+/**
+ * Builds an absolute path from the referenced path by
+ * prefixing it with a `base` path, which defaults to
+ * the current working directory. If the referenced path
+ * is absolute already, nothing is done and a copy of the
+ * referenced path is returned.
+ *
+ * \param[in] base Base path. Default is the return value of Path::pwd().
+ *
+ * \returns A new instance that is absolute.
+ *
+ * \see relative()
+ */
+Path Path::absolute(const Path& base /* = Path::pwd() */) const
+{
+  if (is_absolute())
+    return Path(m_path);
+  else
+    return base.join(m_path);
+}
+
+/**
+ * The referenced path has to to be absolute; by doing pure string
+ * manipulation (read: no symlinks), it will then be determined how to
+ * go from the (also absolute) `base` path to the referenced path. The
+ * result is a relative path, which will be returned by this method.
+ *
+ * On Windows, this method will throw an std::invalid_argument if the `base`
+ * is on a different drive than the referenced path. If either the referenced
+ * or the passed path is relative, std::invalid_argument will also be thrown.
+ *
+ * \param base Base path from which to start. Must also be absolute.
+ *
+ * \returns A new instance as a relative path.
+ *
+ * Example:
+ *
+ * ~~~~~~~~~~~~~~~~~~~~ c++
+ * Path p1("/tmp/foo/bar/baz");
+ * Path p2("/tmp/xxx/yyy");
+ *
+ * p1.relative(p2); // => ../../foo/bar/baz
+ * p2.relative(p1); // => ../../../xxx/yyy
+ * ~~~~~~~~~~~~~~~~~~~~
+ *
+ * \remark Both the referenced path and the `base` argument
+ * are prune()d before they are worked with.
+ *
+ * \see absolute()
+ */
+Path Path::relative(Path base) const
+{
+  if (is_relative())
+    throw(std::invalid_argument("Referenced path must be absolute."));
+  if (base.is_relative())
+    throw(std::invalid_argument("Argument path must be absolute."));
+
+  // Wipe all ".." and ".", this would break the below algorithm
+  base = base.prune();
+  Path refpath = prune();
+
+  // Shortcut for equal paths
+  if (base.m_path == refpath.m_path)
+    return Path(".");
+
+  // Shortcut if base is the root
+  if (base.is_root()) {
+#if defined(_PATHIE_UNIX)
+    return Path(refpath.m_path.substr(1)); // Skip leading /
+#elif defined(_WIN32)
+    return Path(refpath.m_path.substr(root().m_path.length())); // Skip leading / or X:/
+#else
+#error Unsupported system.
+#endif
+  }
+
+  size_t pos = 0;
+  size_t baselength = base.m_path.length();
+  size_t reflength  = refpath.m_path.length();
+  while (true) {
+    if (pos >= baselength)
+      break;
+    else if (pos >= reflength)
+      break;
+    else if (base.m_path[pos] != refpath.m_path[pos])
+      break;
+    else
+      pos++;
+  }
+  // pos now points to the last character in which both strings were equal
+
+  // For each component in base that is not part of refpath, add a "..".
+  std::string resultstr;
+  Path basepart(base.m_path.substr(pos));
+  for(size_t i=0; i < basepart.component_count(); i++)
+    resultstr.append("../");
+
+  // Now append the part of refpath that is not part of base to the result.
+  resultstr.append(refpath.m_path.substr(pos));
+
+  // Done.
+  return Path(resultstr);
+}
+
+/**
+ * Checks if this is an absolute path, i.e. one that
+ * starts with a / on all systems or with X:/
+ * only on Windows, where `X` is a drive letter.
+ *
+ * Note that / on Windows is the root of the current drive
+ * and hence also an absolute path.
+ */
+bool Path::is_absolute() const
+{
+#if defined(_PATHIE_UNIX)
+  return m_path[0] == '/';
+#elif defined(_WIN32)
+  // / is root on current drive
+  if (m_path[0] == '/')
+    return true;
+
+  return m_path[1] == ':'; // This is the only position where : is allowed on windows, and if it is there, the path is absolute with a drive letter (X:/)
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * The inverse of is_absolute().
+ */
+bool Path::is_relative() const
+{
+  return !is_absolute();
+}
+
+/**
+ * Checks if this path is a filesystem root. On UNIX, this
+ * is the case if the path consists solely of one slash, on
+ * Windows this is the case if the path looks like this:
+ * "<letter>:/".
+ */
+bool Path::is_root() const
+{
+#if defined(_PATHIE_UNIX)
+  return m_path.length() == 1 && m_path[0] == '/';
+#elif defined(_WIN32)
+  // / on Windows is root on current drive
+  if (m_path.length() == 1 && m_path[0] == '/')
+    return true;
+
+  // X:/ is root including drive letter
+  return m_path.length() == 3 && m_path[1] == ':';
+#else
+#error Unsupported platform.
+#endif
+}
+
+///@}
+
+/** \name In-place substitution
+ *
+ * These methods change the underlying path string.
+ */
+///@{
+
+void Path::assign(std::string str)
+{
+  m_path = str;
+}
+
+void Path::swap(Path& path) throw()
+{
+  m_path.swap(path.m_path);
+}
+
+///@}
+
+/** \name File attributes
+ *
+ * Functions that work on file attributes like timestamps.
+ */
+///@{
+
+#if defined(_PATHIE_UNIX)
+struct stat* Path::stat() const
+{
+  struct stat* s = (struct stat*) malloc(sizeof(struct stat));
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), s) < 0)
+    throw(Pathie::ErrnoError(errno));
+
+  return s;
+}
+#elif defined(_WIN32)
+/**
+ * \note This method accesses the file system.
+ *
+ * Returns a pointer to a C `stat` struct that describes the
+ * given file. You have to free() the pointer manually yourself.
+ *
+ * \returns A `struct stat` pointer on UNIX, and a `struct _stat`
+ * pointer on Windows.
+ */
+struct _stat* Path::stat() const
+{
+  struct _stat* s = (struct _stat*) malloc(sizeof(struct _stat));
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wstat(utf16.c_str(), s) < 0)
+    throw(Pathie::ErrnoError(errno));
+
+  return s;
+}
+#else
+#error Unsupported system.
+#endif
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Returns the file size.
+ */
+long Path::size() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wstat(utf16.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+
+  return s.st_size;
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Returns the file’s last access time. The value is not
+ * really reliable.
+ */
+time_t Path::atime() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wstat(utf16.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+
+  return s.st_atime;
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Returns the file’s last modification time.
+ */
+time_t Path::mtime() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wstat(utf16.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+
+  return s.st_mtime;
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Returns the file’s creation time.
+ */
+time_t Path::ctime() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wstat(utf16.c_str(), &s) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+
+  return s.st_ctime;
+}
+
+///@}
+
+/** \name Path traversal
+ *
+ * What’s in this directory?
+ */
+///@{
+
+/**
+ * Returns an entry_iterator instance you can use to iterate
+ * the entries in a directory. Note that the list somewhere
+ * always includes the "." (current directory) and ".."
+ * (parent directory) entries.
+ */
+entry_iterator Path::begin_entries() const
+{
+  return entry_iterator(this);
+}
+
+/**
+ * Returns the terminal iterator you test for in order to
+ * find out whether the iteration is complete.
+ */
+entry_iterator Path::end_entries() const
+{
+  return entry_iterator();
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * This method assumes the path is a directory and returns
+ * a list of all entries in it. The items in the list follow
+ * the order of the items on the file system, i.e. for most
+ * applications they are to be considered unsorted.
+ *
+ * \see children()
+ */
+std::vector<Path> Path::entries() const
+{
+  std::vector<Path> results;
+  for(entry_iterator iter=begin_entries(); iter != end_entries(); iter++) {
+    results.push_back(*iter);
+  }
+
+  return results;
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * This method assumes the path is a directory and returns
+ * a list of all its children. Children are all entries
+ * in the directory *except* for the entries for the directory
+ * itself and its parent directory.
+ *
+ * Or for short, this method is the same as children() except
+ * the return value does not include the "." and ".." entries.
+ *
+ * \see entries()
+ */
+std::vector<Path> Path::children() const
+{
+  std::vector<Path> results;
+  for(entry_iterator iter=begin_entries(); iter != end_entries(); iter++) {
+    if (*iter != Path(".") && *iter != Path(".."))
+      results.push_back(*iter);
+  }
+
+  return results;
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Recursively traverse the directory structure below the referenced
+ * path. Each entry will be passed to the callback while traversing
+ * from top to bottom. If the entry passed is a directory, you can return
+ * true if you want to traverse that directory down or false if you
+ * don't want to. If the entry passed is not a directory, the
+ * callback's return value is ignored.
+ *
+ * The callback will never be passed "." and ".." entries. All paths
+ * passed to the callback retain the full prefix, i.e. if you
+ * have this structure:
+ *
+ * ~~~~~~~~~~~~~~~~
+ * foo
+ *   bar/
+ *     baz.txt
+ * ~~~~~~~~~~~~~~~~
+ *
+ * Then find() will give you these paths in this order: `foo`,
+ * `foo/bar`, and `foo/bar/baz.txt`, rather than just the sole
+ * basename (which you can still obtain by calling basename() on the
+ * argument).
+ *
+ * \param cb Callback that takes the currently examined path.
+ *
+ * \remark Do not assume any order for the paths you receive,
+ * except that you will be given a directory entry before you
+ * are given its child entries.
+ */
+void Path::find(bool (*cb)(const Path& entry)) const
+{
+  for(entry_iterator iter=begin_entries(); iter != end_entries(); iter++) {
+    // Skip . and ..
+    if (iter->str() != "." && iter->str() != "..") {
+      Path path = join(*iter);
+      if (cb(path) && path.is_directory()) {
+        path.find(cb);
+      }
+    }
+  }
+}
+
+///@}
+
+/** \name Path status information
+ *
+ * Query information on the path.
+ */
+///@{
+
+
+/**
+ * \note This method acceses the filesystem.
+ *
+ * Checks if the file exists. Note that if you don’t have
+ * sufficient rights for the check on the given path, this
+ * method will throw an exception.
+ */
+bool Path::exists() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+
+  if (access(nstr.c_str(), F_OK) == -1) {
+    int errsav = errno;
+    if (errsav == ENOENT) {
+      return false;
+    }
+    else {
+      throw(Pathie::ErrnoError(errsav));
+    }
+  }
+  else
+    return true;
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  if (_waccess(utf16.c_str(), F_OK) == -1) {
+    int errsav = errno;
+    if (errsav == ENOENT) {
+      return false;
+    }
+    else {
+      throw(Pathie::ErrnoError(errsav));
+    }
+  }
+  else
+    return true;
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method acceses the filesystem.
+ *
+ * Checks if this file is a symbolic link; also
+ * works with NTFS symlinks on Windows. Returns false
+ * rather than erroring out if the referenced path does
+ * not exist.
+ */
+bool Path::is_symlink() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (lstat(nstr.c_str(), &s) < 0) {
+    int errsav = errno;
+
+    if (errsav == ENOENT)
+      return false;
+    else
+      throw(Pathie::ErrnoError(errsav));
+  }
+
+  if (S_ISLNK(s.st_mode))
+    return true;
+  else
+    return false;
+#elif defined(_WIN32)
+  if (!exists())
+    return false;
+
+  return false;
+  // ntifs.h is currently not included in msys2
+  //std::wstring path = utf8_to_utf16(m_path);
+  //return is_ntfs_symlink(path.c_str());
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method acceses the filesystem.
+ *
+ * Checks if this is a directory. Returns false if the
+ * referenced path does not exist rather than erroring out.
+ */
+bool Path::is_directory() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0) {
+    int errsav = errno;
+
+    // "Not found" means it isn’t a directory.
+    if (errsav == ENOENT)
+      return false;
+    else
+      throw(Pathie::ErrnoError(errsav));
+  }
+
+  if (S_ISDIR(s.st_mode))
+    return true;
+  else
+    return false;
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  if (_wstat(utf16.c_str(), &s) < 0) {
+    int errsav = errno;
+
+    if (errsav == ENOENT)
+      return false;
+    else
+      throw(Pathie::ErrnoError(errsav));
+  }
+
+  return s.st_mode & S_IFDIR;
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method accesses the filesystem.
+ *
+ * Checks if this is a file. Returns false
+ * if the referenced path does not exist rather
+ * than erroring out.
+ */
+bool Path::is_file() const
+{
+#if defined(_PATHIE_UNIX)
+  struct stat s;
+  std::string nstr = native();
+
+  if (::stat(nstr.c_str(), &s) < 0) {
+    int errsav = errno;
+
+    if (errsav == ENOENT)
+      return false;
+    else
+      throw(Pathie::ErrnoError(errsav));
+  }
+
+  if (S_ISREG(s.st_mode))
+    return true;
+  else
+    return false;
+#elif defined(_WIN32)
+  struct _stat s;
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  if (_wstat(utf16.c_str(), &s) < 0) {
+    int errsav = errno;
+
+    if (errsav == ENOENT)
+      return false;
+    else
+      throw(Pathie::ErrnoError(errno));
+  }
+
+  return s.st_mode & S_IFREG;
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Utility methods
+ *
+ * These methods operate on the file or directory referenced
+ * by the path.
+ */
+/// @{
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * Creates the referenced directory non-recursively,
+ * i.e. parent directories are not created. Trying
+ * to create a directory below a nonexistant directory
+ * will result in an ErrnoError exception.
+ *
+ * \remark UNIX note: The directory is created with RWX permissions
+ * for everyone, but filtered by your current `umask` before applied
+ * to disk.
+ *
+ * \see mktree()
+ */
+void Path::mkdir() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+
+  if (::mkdir(nstr.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(m_path);
+
+  if (_wmkdir(utf16.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * Deletes the referenced directory, which is required
+ * to be empty, if not, an ErrnoError will be thrown.
+ *
+ * This cannot be used to delete a file rather than a
+ * directory.
+ *
+ * \see remove() unlink()
+ */
+void Path::rmdir() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+
+  if (::rmdir(nstr.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  if (_wrmdir(utf16.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * Deletes the referenced file. This cannot be used to
+ * delete a directory rather than a file.
+ *
+ * \see remove() rmdir()
+ */
+void Path::unlink() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+  if (::unlink(nstr.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  if (_wunlink(utf16.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * Delete this path, regardless of whether it is a file
+ * or an empty directory. This method can’t be used to
+ * delete a directory that isn’t empty.
+ *
+ * \see rmdir() unlink()
+ */
+void Path::remove() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+
+  if (::remove(nstr.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  bool result = false;
+
+  /* On Windows, `_wremove()` doesn’t work on directories. This
+   * function uses the apropriate native Win32API function
+   * calls accordingly therefore. */
+  if (is_directory())
+    result = RemoveDirectoryW(utf16.c_str());
+  else
+    result = DeleteFileW(utf16.c_str());
+
+  if (!result) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the file system.
+ *
+ * This method provides a functionality akin to the UNIX `mkdir -p`
+ * command, i.e. it creates the referenced directory, and if necessary,
+ * also creates all parent directories. Note this method does not
+ * throw an ErrnoError if the referenced directory already exists;
+ * it just does nothing.
+ *
+ * \see mkdir()
+ */
+void Path::mktree() const
+{
+  // Root is required to exist
+  if (is_root())
+    return;
+
+  if (!is_directory()) {
+    Path p = parent();
+
+    if (!p.is_directory()) {
+      p.mktree();
+    }
+
+    mkdir();
+  }
+
+}
+
+/**
+ * \note This method accesses the filesystem.
+ *
+ * Open the referenced path as a file with the given mode.
+ * Refer to your preferred C documentation for the value
+ * of the `mode` parameter.
+ *
+ * As with all methods of this library, Unicode filenames
+ * are handled properly on both UNIX and Windows by transcoding
+ * to UTF-16LE on Windows. Therefore, on UNIX the file
+ * is opened using `fopen()`, and on Windows it is opened
+ * using `_wfopen()`. Thanksfully, as an exception
+ * to Microsoft’s wchar-them-all rule, it is possible to close
+ * a file that is opened with `_wfopen()` by means of the
+ * regular `fclose()` function, which saves me from implementing
+ * a wrapper around the C `FILE*` pointer to abstract the problem.
+ *
+ * In contrast to original `fopen()`, this method throws an
+ * ErrnoError exception if the call fails, i.e. if `fopen()`
+ * returns NULL. As a result, this method will _never_ return
+ * a NULL pointer.
+ *
+ * Here’s an example of how to use this method (with error checking
+ * ommited):
+ *
+ * ~~~~~~~~~~~~~~~~~ c++
+ * Path p("Unicöde file.txt");
+ * FILE* p_file = p.fopen("w");
+ * fwrite("A", 1, 1, p_file);
+ * fclose(p_file);
+ * ~~~~~~~~~~~~~~~~~
+ *
+ * This will create a file named "Unicöde.txt" both on UNIX and Windows.
+ *
+ * \param[in] mode File open mode as per the C `fopen()` documentation.
+ *
+ * \remark Don’t forget you have to close the file using `fclose()`, which
+ * works, as explained, both on UNIX and Windows. `fclose()` is
+ * not wrapped by this library, use your C libraries’ implementation
+ * directly.
+ *
+ * \remark The file’s actual _contents_ are not affected in any way
+ * by this method. They are outside the scope of this library; note
+ * however that with regard to line endings you might want to consider
+ * the "b" mode modifier for binary files.
+ *
+ * \see [Microsoft’s documentation on `fopen()` and `_wfopen()`](http://msdn.microsoft.com/en-us/library/yeby3zcb.aspx)
+ */
+FILE* Path::fopen(const char* mode) const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+
+  FILE* ptr = ::fopen(nstr.c_str(), mode);
+  if (ptr)
+    return ptr;
+  else
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring utf16_path = utf8_to_utf16(m_path);
+  std::wstring utf16_mode = utf8_to_utf16(mode);
+  FILE* ptr = _wfopen(utf16_path.c_str(), utf16_mode.c_str());
+
+  if (ptr)
+    return ptr;
+  else
+    throw(Pathie::ErrnoError(errno));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * Sets the file’s modification and access times to the
+ * current time. If the file does not yet exist, it is created.
+ *
+ * This is akin to the UNIX `touch` command.
+ */
+void Path::touch() const
+{
+#if defined(BSD) // FreeBSD didn’t have futimens() yet as of testing (december 2014)
+  FILE* p_file = Path::fopen("a");
+  if (futimes(fileno(p_file), NULL) < 0) {
+    fclose(p_file);
+    throw(Pathie::ErrnoError(errno));
+  }
+
+  fclose(p_file);
+#elif defined(_PATHIE_UNIX)
+  FILE* p_file = Path::fopen("a");
+  // futimens() is considered the modern variant of doing this
+  // (at least according to utimes(2) on my Linux system).
+  if (futimens(fileno(p_file), NULL) < 0) {
+    fclose(p_file);
+    throw(Pathie::ErrnoError(errno));
+  }
+
+  fclose(p_file);
+#elif defined(_WIN32)
+  // Create file if it does not exist yet
+  if (!exists()) {
+    FILE* p_file = Path::fopen("a");
+    fclose(p_file);
+  }
+
+  SYSTEMTIME currenttime;
+  GetSystemTime(&currenttime);
+
+  FILETIME newtime;
+  if (SystemTimeToFileTime(&currenttime, &newtime) == 0) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  std::wstring utf16 = utf8_to_utf16(m_path);
+  HANDLE filehandle = CreateFileW(utf16.c_str(), FILE_WRITE_ATTRIBUTES, 0, NULL, OPEN_EXISTING, 0, NULL);
+  if (filehandle == INVALID_HANDLE_VALUE) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  if (SetFileTime(filehandle, NULL, &newtime, &newtime) == 0) {
+    int errsav = GetLastError();
+    CloseHandle(filehandle);
+    throw(Pathie::WindowsError(errsav));
+  }
+
+  CloseHandle(filehandle);
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * This method, which is akin to the UNIX "rm -r" command, removes
+ * the entire referenced directory hierarchy recursively, including
+ * any files and directories contained therein.
+ */
+void Path::rmtree() const
+{
+  if (is_directory()) {
+    std::vector<Path> kids = children();
+
+    for(std::vector<Path>::iterator iter=kids.begin(); iter != kids.end(); iter++) {
+       join(*iter).rmtree();
+    }
+
+    rmdir();
+  }
+  else { // file or similar
+    unlink();
+  }
+}
+
+/**
+ * \note This method writes to the filesystem.
+ *
+ * This method makes the referenced file a symbolic link
+ * to the path passed as an argument. On Windows, an
+ * NTFS symlink is created.
+ *
+ * \remark On Windows, this function requires that the process holds
+ * the `SE_CREATE_SYMBOLIC_LINK_NAME` privilege or it will fail with a
+ * WindowsError exception whose error code is 1314
+ * (`ERROR_PRIVILEGE_NOT_HELD`).
+ */
+void Path::make_symlink(const Path& target) const
+{
+#if defined(_PATHIE_UNIX)
+  std::string target_nstr = target.native();
+  std::string nstr = native();
+
+  if (symlink(target_nstr.c_str(), nstr.c_str()) < 0)
+    throw(Pathie::ErrnoError(errno));
+#elif defined(_WIN32)
+  std::wstring source = utf8_to_utf16(m_path);
+  std::wstring target2 = utf8_to_utf16(target.m_path);
+
+  DWORD flags = 0;
+  if (target.is_directory())
+    flags = SYMBOLIC_LINK_FLAG_DIRECTORY;
+
+  if (CreateSymbolicLinkW(source.c_str(), target2.c_str(), flags) == 0) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method accesses the file system.
+ *
+ * Treats the referened path as a symlink and reads in its target,
+ * returning it as a new Path intance. Supports NTFS symlinks.
+ */
+Path Path::readlink() const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+  char buf[PATH_MAX];
+  memset(buf, '\0', PATH_MAX);
+
+  ssize_t count = ::readlink(nstr.c_str(), buf, PATH_MAX);
+  if (count < 0)
+    throw(Pathie::ErrnoError(errno));
+
+  return Path(filename_to_utf8(std::string(buf, count)));
+#elif defined(_WIN32)
+  std::wstring utf16_path = utf8_to_utf16(m_path);
+
+  throw(std::runtime_error("NTFS symlinks currently not supported."));
+
+  // ntifs.h currently not included in msys2.h
+  //if (!is_ntfs_symlink(utf16_path.c_str()))
+  //  throw(std::runtime_error("Not an NTFS symlink."));
+  //
+  //wchar_t* utf16_target = NULL;
+  //utf16_target = read_ntfs_symlink(utf16_path.c_str());
+  //
+  //Path result(utf16_to_utf8(utf16_target));
+  //free(utf16_target);
+  //
+  //return result;
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method writes to the file system.
+ *
+ * Renames a file to another name without involving file streams.
+ *
+ * \param[in] newname The new name of the file.
+ */
+void Path::rename(Path& newname) const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+  std::string newname_nstr = newname.native();
+
+  if (::rename(nstr.c_str(), newname_nstr.c_str()) != 0)
+    throw Pathie::ErrnoError(errno);
+#elif defined(_WIN32)
+  std::wstring utf16_oldname = utf8_to_utf16(m_path);
+  std::wstring utf16_newname = utf8_to_utf16(newname.m_path);
+
+  if (_wrename(utf16_oldname.c_str(), utf16_newname.c_str()) != 0)
+    throw Pathie::ErrnoError(errno);
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Operators
+ *
+ * C++ operators.
+ */
+///@{
+
+Path& Path::operator=(const Path& path)
+{
+  // Self-assignment
+  if (this == &path)
+    return *this;
+
+  m_path = path.m_path;
+  return *this;
+}
+
+Path& Path::operator=(const std::string& str)
+{
+  m_path = str;
+  return *this;
+}
+
+/**
+ * Compares two Path instances. Two paths are considered equal
+ * if their underlying path std::strings are equal.
+ */
+bool Path::operator==(const Path& other) const
+{
+  return m_path == other.m_path;
+}
+
+/**
+ * Compares two Path instances. Two paths are considered inequal
+ * if their underlying path std::strings are inequal.
+ */
+bool Path::operator!=(const Path& other) const
+{
+  return m_path != other.m_path;
+}
+
+/**
+ * Compares two Path instances. The referenced path is
+ * considered smaller than `other` if the underlying path
+ * std::string of the referenced path is smaller than the
+ * one of `other`.
+ */
+bool Path::operator<(const Path& other) const
+{
+  return m_path < other.m_path;
+}
+
+/**
+ * Compares two Path instances. The referenced path is
+ * considered greater than `other` if the underlying path
+ * std::string of the referenced path is greater than the
+ * one of `other`.
+ */
+bool Path::operator>(const Path& other) const
+{
+  return m_path > other.m_path;
+}
+
+/**
+ * Compares two Path instances. The referenced path is
+ * considered smaller than or equal to `other` if the underlying path
+ * std::string of the referenced path is smaller than or equal to the
+ * one of `other`.
+ */
+bool Path::operator<=(const Path& other) const
+{
+  return m_path <= other.m_path;
+}
+
+/**
+ * Compares two Path instances. The referenced path is
+ * considered greater than or equal to `other` if the underlying path
+ * std::string of the referenced path is greater than or equal to the
+ * one of `other`.
+ */
+bool Path::operator>=(const Path& other) const
+{
+  return m_path >= other.m_path;
+}
+
+/**
+ * This method allows you to access a specific component in the
+ * path string. The first component has the index 0; for an
+ * absolute path, it will be the / entry.
+ *
+ * If you specify an index that is beyond the end of the path,
+ * an std::out_of_range exception will be thrown.
+ *
+ * \param index Index of the component to retrieve.
+ *
+ * \see component_count()
+ *
+ * \remark This operator loops over the path string internally
+ * each time you request an element. If you want to index the
+ * path consecutively, you might consider using burst(), which
+ * can be more performant as it only loops once over the path
+ * string.
+ */
+Path Path::operator[](size_t index) const
+{
+  // Absolute path index 0 needs special treatment
+  if (index == 0 && m_path[0] == '/')
+    return Path("/");
+
+  size_t pos     = 0;
+  size_t lastpos = 0;
+  size_t i       = 0;
+  while ((pos = m_path.find("/", pos)) != string::npos) { // Assignment intended
+    if (i == index)
+      return Path(m_path.substr(lastpos, pos - lastpos));
+
+    lastpos = pos + 1;
+    pos++;
+    i++;
+  }
+
+  // Last element requested
+  if (index == i)
+    return Path(m_path.substr(lastpos));
+
+  // Out of range
+  throw(std::out_of_range("Index out of path range"));
+}
+
+/**
+ * Appends a /, then the new component, then calls expand(), and
+ * finally returns a new Path instance.
+ *
+ * \param path New component.
+ *
+ * \returns New Path instance.
+ */
+Path Path::operator/(Path path) const
+{
+  return join(path);
+}
+
+/**
+ * Appends a /, then the new component, and
+ * finally returns a new Path instance.
+ *
+ * \param str New component.
+ *
+ * \returns New Path instance.
+ */
+Path Path::operator/(std::string str) const
+{
+  return join(str);
+}
+
+/**
+ * Appends a / followed by the new component `path` onto this
+ * instance and returns this instance.
+ *
+ * \param path New component.
+ *
+ * \returns The receiver.
+ */
+Path& Path::operator/=(Path path)
+{
+  *this = join(path);
+  return *this;
+}
+
+/**
+ * Appends a / followed by the new component `path` onto this
+ * instance and returns this instance.
+ *
+ * \param str New component.
+ *
+ * \returns The receiver.
+ */
+Path& Path::operator/=(std::string str)
+{
+  *this = join(str);
+  return *this;
+}
+
+/**
+ * Allows you to insert Pathie::Path instances into `std::cout`.
+ *
+ * ~~~~~~~~~~ c++
+ * Pathie::Path p("foo/bar");
+ * std::cout << p << std::endl;
+ * ~~~~~~~~~~
+ */
+std::ostream& operator<<(std::ostream& stream, const Path& p)
+{
+  return stream << p.str();
+}
+
+///@}
+
+#ifdef _PATHIE_UNIX
+/*
+ * Returns the XDG directory for the given environment variable,
+ * if defined, otherwise returns home() with `defaultpath`
+ * appended.
+ *
+ * See http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+ * for values.
+ */
+Path Path::get_xdg_dir(const std::string& envvarname, const std::string& defaultpath)
+{
+  std::string env_nstr = utf8_to_filename(envvarname); // environment is encoded the same as the filenames
+  char* env_value = getenv(env_nstr.c_str());
+  if (env_value)
+    return Path(filename_to_utf8(env_value));
+
+  return Path::home().join(defaultpath);
+}
+
+std::vector<Path> Path::get_xdg_dirlist(const std::string& envvarname, const std::string& defaultlist)
+{
+  std::string env_nstr = utf8_to_filename(envvarname); // environment is encoded the same as the filenames
+  char* env_value = getenv(env_nstr.c_str());
+  std::string envstr;
+  if (env_value && strcmp(env_value, "") != 0)
+    envstr = filename_to_utf8(env_value); // Encode entire env string to UTF-8
+  else
+    envstr = defaultlist;
+
+  size_t pos = 0;
+  size_t lastpos = 0;
+  std::vector<Path> results;
+  while ((pos = envstr.find(":")) != string::npos) {
+    results.push_back(Path(envstr.substr(lastpos, pos))); // envstr is already UTF-8
+
+    lastpos = pos + 1;
+    pos++;
+  }
+
+  results.push_back(envstr.substr(lastpos));
+
+  return results;
+}
+
+std::string Path::get_xdg_userdir_setting(const std::string& setting)
+{
+  // XDG user-dirs spec recommends (only) checking for $XDG_CONFIG_HOME/user-dirs.dirs,
+  // the files under $XDG_CONFIG_DIRS are not to consider.
+  Path userconfig = Path::config_dir().join("user-dirs.dirs");
+
+  if (userconfig.is_file()) {
+    FILE* p_file = userconfig.fopen("r");
+
+    char line[256];
+    char buf[256];
+    bool found = false;
+    while (!feof(p_file)) {
+      memset(line, 0, 256);
+      memset(buf, 0, 256);
+
+      fgets(line, 256, p_file);
+
+      // Ignore comments and empty lines
+      if (line[0] == '#' || line[0] == '\n')
+        continue;
+
+      // Extract the setting name from the line, e.g. "DOWNLOAD" for
+      // "XDG_DOWNLOAD_DIR=...".
+      strncpy(buf, line + 4, setting.length()); // +4 for "XDG_"
+      if (strcmp(buf, setting.c_str()) == 0) {
+        found = true;
+        break;
+      }
+    }
+
+    fclose(p_file);
+
+    // Error out if not found
+    if (!found) {
+      std::string msg = "Unknown XDG directory '";
+      msg += setting + "' requested.";
+      throw(std::runtime_error(msg));
+    }
+
+    // OK, we have found the correct setting. Extract the value now.
+    // »XDG_DOWNLOAD_DIR="$HOME/Downloads"«
+    char* start = strchr(line, '"') + 1; // Exclude " itself
+    size_t len  = strcspn(start, "\"");
+
+    if (!start) // Malformed
+      throw(std::runtime_error("Malformed XDG config file (quote mismatch/missing quotes)!"));
+
+    memset(buf, 0, 256);
+    strncpy(buf, start, len);
+    // buf now contains the part between the quotes followed by NUL bytes
+
+    char result[PATH_MAX];
+    memset(result, 0, PATH_MAX);
+
+    // Replace $HOME with env value of $HOME
+    start = strstr(buf, "$HOME");
+    if (start) { // Contains $HOME
+      char* homestr = getenv("HOME");
+      if (!homestr)
+        throw(std::runtime_error("$HOME not set!"));
+
+      // Stuff before $HOME
+      strncpy(result, buf, ((char*)start) - ((char*)buf)); // Compiler does not allow doing pointer arithmetics with char[], but with char* ??? They should be the same...
+      // $HOME replacement
+      strcpy(result + strlen(result), homestr);
+      // Suff after $HOME ($HOME is exactly 5 chars long)
+      strcpy(result + strlen(result), start + 5);
+    }
+    else { // No $HOME included. Copy everything verbosely.
+      strcpy(result, buf);
+    }
+
+    // result now holds the final result with lots of NUL bytes at the end.
+    return std::string(result);
+  }
+
+  // No XDG configuration on this system, use $HOME.
+  return Path::home().str();
+}
+#endif
+
+/** \name Program data directories
+ *
+ * Directories containing program data other than files the
+ * user works with (e.g. configuration files).
+ */
+///@{
+
+/**
+ * Returns the directory for application- and user-specific permanent
+ * data.
+ *
+ * On UNIX, this returns $XDG_DATA_HOME, defaulting to ~/.local/share.
+ *
+ * On Windows, this returns the roaming appdata folder, which defaults
+ * to `C:/Users/username/AppData/Roaming`.
+ */
+Path Path::data_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return get_xdg_dir("XDG_DATA_HOME", ".local/share");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_APPDATA, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \warning This method may behave unexpectedly on Windows; see below.
+ *
+ * Returns the directory for application- and user-specific configuration
+ * files.
+ *
+ * On UNIX, this returns $XDG_CONFIG_HOME, defaulting to ~/.config.
+ *
+ * Windows does not have a notion of a directory for configuration
+ * files, hence some return value for this method had to be chosen. I
+ * think it is best to not clutter a user’s home directory with config
+ * files, and [this stackoverflow thread](https://stackoverflow.com/questions/2243895/location-to-put-user-configuration-files-in-windows)
+ * suggests to place the files in the data_dir(). That however yields
+ * the problem of possible name clashes when you want to name a file
+ * the same in data_dir() and config_dir(). It is not an option to
+ * fall back to the "LocalSettings" directory instead, because 99% of
+ * the applications written are "roaming" applications rather than
+ * "local" ones, and any use of the "LocalSettings" directory
+ * (available via cache_dir()) must be a specific decision of the
+ * programmer therefore. The decision was made that this method on
+ * Windows should return the same as data_dir() without a specific
+ * encforcing reason, but, as said, some decision needed to be
+ * made. As a consequence, you have to be careful to not accidentally
+ * place equally named files in data_dir() and config_dir() as they
+ * would conflict.
+ *
+ * I want to point out that on Windows, configuration files are rather
+ * unusual. The normal way to save configuration settings on Windows
+ * is use of the Windows Registry, which is beyond the scope of a
+ * path manipulation library like Pathie.
+ */
+Path Path::config_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return get_xdg_dir("XDG_CONFIG_HOME", ".config");
+#elif defined(_WIN32)
+  return data_dir();
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Returns the directory for application- and user-specific cache files, i.e.
+ * files that, when deleted, do not impact the application apart from resetting
+ * it to some default values. A typical example for cache data is saving the
+ * folder where the user last opened a file, so that when he starts the application
+ * the next time and wants to open a file, is directly taken to the directory
+ * where he last picked a file from. Positions of windows could also be saved
+ * here, allowing application windows to be placed exactly where they were
+ * when the application was closed last time. In short, store the unimportant
+ * stuff here and be prepared the data is gone on application startup.
+ *
+ * On UNIX, this returns $XDG_CACHE_HOME, defaulting to ~/.cache.
+ *
+ * On Windows, this method returns the LOCAL_APPDATA folder, which means that
+ * in corporate setups using Windows roaming the data will not be available
+ * if you log in on another machine (which is expected, cf. the directory
+ * saving example above, which would break if this was saved into the roaming
+ * folder). This defaults to `C:/Users/username/AppData/Local`.
+ */
+Path Path::cache_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return get_xdg_dir("XDG_CACHE_HOME", ".cache");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_LOCAL_APPDATA, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Returns the directory for application- and user-specific volatile
+ * runtime data, i.e. data that WILL be deleted once the user logs
+ * off.
+ *
+ * On UNIX, this returns $XDG_RUNTIME_DIR. That environment variable is
+ * required to be defined as per the XDG standard, and if it isn’t, this
+ * method prints a warning to the standard error stream and uses the
+ * value of Path::temp_dir() instead.
+ *
+ * On Windows, the return value of this method is equivalent to that
+ * of temp_dir() always.
+ */
+Path Path::runtime_dir()
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = utf8_to_filename("XDG_RUNTIME_DIR"); // environment is encoded the same as paths
+  char* env_value = getenv(nstr.c_str());
+  if (env_value)
+    return Path(filename_to_utf8(env_value));
+
+  Path tmp = Path::temp_dir();
+  std::cerr << "(pathie XDG) WARNING: XDG_RUNTIME_DIR not defined in environment. Falling back to '" << tmp.str() << "'." << std::endl;
+
+  return tmp;
+#elif defined(_WIN32)
+  return temp_dir();
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Returns the root directory for temporary directories, i.e.
+ * directories which are expected to vanish when the application
+ * closes. Do not assume that anything you created in this
+ * directory still exists after your application exited and is
+ * restarted.
+ *
+ * \returns Path instance for temporary directory.
+ *
+ * \remark On UNIX, this function honours the value of the
+ * environment variable $TMPDIR. If that is not defined, the standard
+ * "/tmp" location will be returned. On Windows, GetTempPath() is
+ * called to retrieve the path, which in turn honours the environment
+ * variables $TMP, $TEMP, and $USERPROFILE (in that order); if all
+ * of them are undefined, a Windows-predefined path is returned,
+ * which defaults to `C:/Users/username/AppData/Local/Temp`.
+ *
+ * \see mktmpdir(3), [GetTempPath()](http://msdn.microsoft.com/en-us/library/windows/desktop/aa364992%28v=vs.85%29.aspx)
+ */
+Path Path::temp_dir()
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = utf8_to_filename("TMPDIR"); // environment is encoded the same as paths
+  char* env_value = NULL;
+
+  if ((env_value = getenv(nstr.c_str()))) // Single = intended
+    return Path(filename_to_utf8(env_value));
+
+
+  return Path("/tmp"); // As per the Filesystem Hierarchy Standard.
+#elif defined(_WIN32)
+  wchar_t buf[MAX_PATH +1]; // See http://msdn.microsoft.com/en-us/library/windows/desktop/aa364992%28v=vs.85%29.aspx for the +1
+  DWORD count = GetTempPathW(MAX_PATH + 1, buf);
+
+  if (count == 0) {
+    DWORD err = GetLastError();
+    throw(Pathie::WindowsError(err));
+  }
+
+  std::wstring utf16(buf, count);
+  return utf16_to_utf8(utf16);
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/**
+ * Create a temporary directory (with permissions set to
+ * 0700 on UNIX). The directory is guaranteed to be empty, and
+ * it is your responsibility to recursively remove the
+ * directory on program exit (or earlier).
+ *
+ * \param[in] name (`"tmpd"`) This will be used as part of
+ * the name of the directory, _not_ as the entire name.
+ *
+ * \returns Path instance for the new temporary directory.
+ *
+ * \remark Parts of the random name are generated with the
+ * C rand() function, so you might want to call srand()
+ * before using this function in order to seed the random
+ * number generator with a useful value.
+ */
+Path Path::mktmpdir(const std::string& name /* = "tmpd" */)
+{
+  Path tmp = Path::temp_dir() / Path(make_tempname(name));
+  tmp.mkdir();
+
+#ifdef _PATHIE_UNIX
+  std::string nstr = tmp.native();
+  chmod(nstr.c_str(), S_IRWXU); // Silently ignore failure of setting file permissions
+#endif
+  // TODO: How to do that on Windows?
+
+  return tmp;
+}
+
+// Constructs a filename that tries to be unique.
+std::string Path::make_tempname(const std::string& namepart)
+{
+  time_t now;
+  struct tm* p_nowinfo = NULL;
+  time(&now);
+  p_nowinfo = localtime(&now);
+
+  char buf[16]; // 15 + NUL
+  memset(buf, '\0', 16);
+  strftime(buf, 16, "%Y%m%d-%H%M%S", p_nowinfo);
+  std::string timepart(buf, 15);
+
+#if defined(_PATHIE_UNIX)
+  std::stringstream ss;
+  ss << getpid();
+  std::string pidpart = ss.str();
+#elif defined(_WIN32)
+  std::stringstream ss;
+  ss << GetCurrentProcessId();
+  std::string pidpart = ss.str();
+#else
+#error Unsupported system.
+#endif
+
+  memset(buf, '\0', 16);
+  short i;
+  for(i=0; i < 16; i++)
+    buf[i] = 97 + rand() % 26; // Random char between a and z
+
+  std::string randompart(buf, 15);
+
+  return namepart + "_" + timepart + pidpart + randompart;
+}
+
+#if defined(_PATHIE_UNIX)
+/**
+ * \note Only available on UNIX. Accesses the file system.
+ *
+ * Returns $XDG_DATA_DIRS as per the XDG specification.
+ * If that is not set, returns a vector of paths for
+ * /usr/local/share and /usr/share.
+ */
+std::vector<Path> Path::data_dirs()
+{
+  return get_xdg_dirlist("XDG_DATA_DIRS", "/usr/local/share/:/usr/share/");
+}
+
+/**
+ * \note Only available on UNIX. Accesses the file system.
+ *
+ * Returns $XDG_CONFIG_DIRS as per the XDG specification.
+ * If that is not set, returns a vector of paths for
+ * /etc/xdg (i.e. a one-element vector).
+ */
+std::vector<Path> Path::config_dirs()
+{
+  return get_xdg_dirlist("XDG_CONFIG_DIRS", "/etc/xdg");
+}
+#endif
+
+/** \name User data directories
+ *
+ * Directories that contain user data like music or text files
+ * the user works with.
+ */
+///@{
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the directory of the user’s desktop. Generally,
+ * any files placed in this directory will appear on the
+ * user’s desktop view (the area shown when no windows
+ * are open).
+ *
+ * On UNIX, this is $XDG_DESKTOP_DIR, defaulting to `~/Desktop`.
+ * Note you likely will receive a localised version (like “Schreibtisch”
+ * on a German Linux).
+ *
+ * On Windows, the default is `C:/Users/username/Desktop` or a localised
+ * version.
+ */
+Path Path::desktop_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("DESKTOP"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_DESKTOPDIRECTORY, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the directory for the user’s documents. This is
+ * not the place for your data files, savegames, or configuration
+ * files -- it is meant only for textual and other documents you can
+ * access with an office or similar program. See data_dir() for a directory
+ * you can store your data into.
+ *
+ * On UNIX, this is $XDG_DOCUMENTS_DIR, defaulting to `~/Documents`.
+ * Note you likely will receive a localised version (like "Dokumente"
+ * on a German Linux).
+ *
+ * On Windows, the default is `C:/Users/username/Documents` or a localised
+ * version.
+ */
+Path Path::documents_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("DOCUMENTS"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_PERSONAL, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s download directory. Unfortunately, this function
+ * is currently unsupported under Windows, because MinGW has not yet
+ * adapted the necessary win32api changes.
+ *
+ * On UNIX, this is $XDG_DOWNLOAD_DIR, defaulting to `~/Downloads`.
+ * Note you likely will receive a localised version.
+ */
+Path Path::download_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("DOWNLOAD"));
+#elif defined(_WIN32)
+  // Not available via CSIDL, must use the newer KNOWNFOLDERID system,
+  // which is not supported by MinGW yet.
+  throw(std::runtime_error("KNOWNFOLDERID is not supported by MinGW yet, can't retrieve this directory."));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s music directory.
+ *
+ * On UNIX, this is $XDG_MUSIC_DIR, defaulting to `~/Music`.
+ * Note you likely will receive a localised version (like "Musik"
+ * on a German Linux).
+ *
+ * On Windows, this defaults to `C:/users/username/Music` or a localised
+ * version.
+ */
+Path Path::music_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("MUSIC"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_MYMUSIC, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s pictures directory.
+ *
+ * On UNIX, this is $XDG_PICTURES_DIR, defaulting to `~/Pictures`.
+ * Note you likely will receive a localised version (like "Bilder"
+ * on a German Linux).
+ *
+ * On Windows, this defaults to `C:/users/username/Pictures` or a
+ * localised version.
+ */
+Path Path::pictures_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("PICTURES"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_MYPICTURES, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s publicshare directory. This directory may
+ * be exposed to network access on the local network, though this
+ * is not required.
+ *
+ * On UNIX, this is $XDG_PUBLICSHARE_DIR, defaulting to `~/Public`.
+ * Note you likely will receive a localised version (like "Öffentlich"
+ * on a German Linux).
+ *
+ * On Windows, this defaults to `C:/users/username/AppData/Roaming/Microsoft/Windows/Network Shortcuts`.
+ */
+Path Path::publicshare_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("PUBLICSHARE"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_NETHOOD, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s directory for document templates. The files
+ * in this directory can generally be accessed by right-clicking
+ * in the user’s favourite file manager and selecting "new" followed
+ * by the desired file. The file will then be copied from the templates
+ * directory into the directory the user works in at the moment.
+ *
+ * On UNIX, this is $XDG_TEMPLATES_DIR, defaulting to `~/Templates`.
+ * Note you likely will receive a localised version (like "Vorlagen"
+ * on a German Linux).
+ *
+ * On Windows, this defaults to `C:/users/username/AppData/Roaming/Microsoft/Windows/Templates`.
+ */
+Path Path::templates_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("TEMPLATES"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_TEMPLATES, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s directory for videos.
+ *
+ * On UNIX, this is $XDG_VIDEOS_DIR, defaulting to `~/Videos`
+ * or a localised version.
+ *
+ * On Windows, this defaults to `C:/users/username/Videos` or a
+ * localised version.
+ */
+Path Path::videos_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path(get_xdg_userdir_setting("VIDEOS"));
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_MYVIDEO, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the file system.
+ *
+ * Retrieves the user’s path for application starters. On UNIX,
+ * this will return a directory (typically `~/.local/share/applications`)
+ * where you can store XDG `.desktop` files in so they get picked up
+ * by the desktop environment’s application menu for that user. On Windows,
+ * the user’s startmenu folder is returned, and any files and directories
+ * you add there will show up in the user’s startmenu.
+ *
+ * \remark On Windows, this is not the global startmenu folder, but the
+ * user’s specific ones. Other users will not have the entries you put
+ * here in their startmenu.
+ */
+Path Path::appentries_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return data_dir().join("applications");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_STARTMENU, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Global data directories
+ *
+ * Directories that contain data either unrelated to users at all,
+ * or applicable to all users at once. Be careful to read the
+ * Windows notes in the documentation of these methods, as Windows
+ * only supplies are much smaller set of system directories than UNIX.
+ */
+///@{
+
+/**
+ * Retrieves the global directory for application starters. On UNIX,
+ * any XDG `.desktop` files you place there should show up in any user’s
+ * desktop environment’s menu, and on Windows, anything you place there
+ * should show up in any user’s startmenu.
+ *
+ * \param local (true) If true, this method returns the location
+ * under the `/usr/local` hierarchy, otherwise it returns the
+ * location under the `/usr` hierarchy. This parameter has no meaning
+ * on Windows and is ignored.
+ */
+Path Path::global_appentries_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+  if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/usr/local/share/applications");
+  else
+    return Path("/usr/share/applications");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_COMMON_STARTMENU, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Retrieves the directory for immutable application data that isn’t user-specific,
+ * i.e. which shall be available to all users using the system.
+ *
+ * On UNIX, this is `/usr/share`. On Windows, this is `C:/Windows/system32`.
+ * On Windows, beware conflicts with files of the same name in
+ * global_config_dir()!
+ *
+ * \param local (true) If true, this method returns the location
+ * under the `/usr/local` hierarchy, otherwise it returns the
+ * location under the `/usr` hierarchy. This parameter has no meaning
+ * under Windows and is ignored.
+ */
+Path Path::global_immutable_data_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+  if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/usr/local/share");
+  else
+    return Path("/usr/share");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_SYSTEM, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Retrieves the directory for mutable application data that isn’t user-specific,
+ * i.e. which shall be available to all users using the system.
+ *
+ * On UNIX, this is `/var/lib`. On Windows, this is the Application Data folder
+ * for the "All Users" account. On Windows, this is equivalent to global_cache_dir(),
+ * so beware file name conflicts on Windows!
+ *
+ * \param local (true) If true, this method returns the location
+ * under the `/var/local` hierarchy, otherwise it returns the
+ * location under the `/var` hierarchy. This parameter has no meaning
+ * under Windows and is ignored.
+ */
+Path Path::global_mutable_data_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+  if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/var/local/lib");
+  else
+    return Path("/var/lib");
+#elif defined (_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_COMMON_APPDATA, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system
+#endif
+}
+
+/**
+ * Retrieves the directory for global cache data, i.e. data, which
+ * is not essential to the program and can be reconstructed if it
+ * gets lost.
+ *
+ * On UNIX, this returns `/var/cache`. Windows does not have a notion
+ * of such a directory, hence the value is equal to the return value
+ * of global_mutable_data_dir(). Therefore: On Windows, beware conflicts if you
+ * use files of the same name in global_mutable_data_dir() and
+ * global_cache_dir()!
+ *
+ * \param local (true) If true, returns the cache directory for locally installed
+ * programs, which is `/var/local/cache`. This parameter has no effect under
+ * systems other than UNIX.
+ */
+Path Path::global_cache_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+ if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/var/local/cache");
+  else
+    return Path("/var/cache");
+#elif defined(_WIN32)
+  return global_mutable_data_dir();
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note On UNIX, this method accesses the filesystem.
+ *
+ * Returns the directory for volatile information that will be deleted
+ * on system shutdown.
+ *
+ * On UNIX, this returns `/run` if it exists, otherwise `/var/run`.
+ * Windows does not have a notion of such a directory; as a replacement,
+ * `C:/Temp` is returned.
+ *
+ * \param local (true) If true, returns the equivalent directory for
+ * `/run` for locally installed programs, which is `/var/local/run`. This
+ * parameter has no effect on systems other than UNIX.
+ */
+Path Path::global_runtime_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+  if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/var/local/run");
+
+  Path run("/run");
+  if (run.exists())
+    return run;
+  else
+    return Path("/var/run");
+#elif defined(_WIN32)
+  return Path("C:/Temp");
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Returns the global directory for configuration files.
+ *
+ * On UNIX, this is `/etc`. Windows does not really have a notion
+ * for configuration directories. This method returns the Windows
+ * system folder for that purpose, typically `C:/Windows/system32`;
+ * this is equivalent to global_immutable_data_dir(), so be careful
+ * when you place files of the same name in global_config_dir()!
+ *
+ * \param local (true) If true, returns the global configuration
+ * directory for locally installed programs instead, which is
+ * `/usr/local/etc`.
+ */
+Path Path::global_config_dir(localpathtype local)
+{
+#if defined(_PATHIE_UNIX)
+  if (local == Path::LOCALPATH_LOCAL || (local == Path::LOCALPATH_DEFAULT && get_global_dir_default() == Path::LOCALPATH_LOCAL))
+    return Path("/usr/local/etc");
+  else
+    return Path("/etc");
+
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_SYSTEM, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * Retrieves the global directory for self-contained applications, i.e.
+ * applications that require a directory structure different from the
+ * Filesystem Hierarchy Standard (FHS). Such programs are an exception
+ * under UNIX, but are the regular case on Windows. The programs placed
+ * in this directory are intended to be available to all users using the
+ * system.
+ *
+ * Under UNIX, this method returns the `/opt` directory. On Windows,
+ * it returns the Program Files directory (typically `C:\Program Files`).
+ *
+ * \note On UNIX, the FHS mandates that programs installed under
+ * `/opt` do not use the usual directories for variable information
+ * returned by global_mutable_data_dir() and global_cache_dir(), but
+ * instead use `/var/opt`.
+ */
+Path Path::global_programs_dir()
+{
+#if defined(_PATHIE_UNIX)
+  return Path("/opt");
+#elif defined(_WIN32)
+  wchar_t dir[MAX_PATH];
+  HRESULT result = SHGetFolderPathW(NULL, CSIDL_PROGRAM_FILES, NULL, SHGFP_TYPE_CURRENT, dir);
+  if (result != S_OK)
+    throw(Pathie::WindowsHresultError(result));
+
+  return Path(utf16_to_utf8(dir));
+
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Miscellaneous static functions
+ *
+ * Other functions that didn’t fit somewhere else.
+ */
+///@{
+
+/// \note This method accesses the filesystem.
+///
+/// Uses a shell-like glob pattern on the current working directory.
+/// Typically available patterns include "*" for a string of
+/// arbitrary length and "?" for a string of length one.
+///
+/// Refer to glob(7) for glob patterns available on UNIX.
+/// Refer to [MSDN](http://msdn.microsoft.com/en-us/library/windows/desktop/aa364418%28v=vs.85%29.aspx)
+/// for glob patterns available on Windows.
+///
+/// Windows does not support recursive patterns like
+///
+/// \verbatim **/* \endverbatim
+///
+/// or
+///
+/// \verbatim foo/*/bar \endverbatim
+///
+/// . This will result in a Pathie::WindowsError exception
+/// with Windows error code 123 (“invalid filename”). For cross-platform
+/// recursive matching, you can try to combine find() and fnmatch().
+///
+/// \param[in] pattern Glob pattern.
+/// \param flags (`0`) Globbing flags. Refer to glob(3) for
+/// possible values; the parameter is ignored on Windows.
+///
+/// \returns A vector of Path instances that matched the glob
+/// pattern.
+///
+/// \remark Glob patterns on UNIX are generally much more powerful than
+/// those on Windows. Be careful when using anything apart from "*" and "?"
+/// patterns on Windows.
+///
+/// \see dglob() fnmatch()
+///
+std::vector<Path> Path::glob(const std::string& pattern, int flags /* = 0 */)
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = utf8_to_filename(pattern);
+  glob_t globinfo;
+  int result = ::glob(nstr.c_str(), flags, NULL, &globinfo);
+
+  if (result == GLOB_NOMATCH) {
+    return std::vector<Path>(); // Empty vector
+  }
+  else if (result == 0) {
+    std::vector<Path> result;
+
+    for(size_t i=0; i < globinfo.gl_pathc; i++) {
+      result.push_back(Path(filename_to_utf8(globinfo.gl_pathv[i])));
+    }
+
+    globfree(&globinfo);
+    return result;
+  }
+  else {
+    throw(GlobError(result));
+  }
+#elif defined(_WIN32)
+  std::vector<Path> results;
+  std::wstring utf16_pattern = utf8_to_utf16(pattern);
+
+  /* Windows’ FindFirstFile()/FindNextFile() returns bare file names.
+   * However, to ensure output similar to the UNIX version, we prepend
+   * the pattern’s stem if a slash / is found in the pattern; FindFirstFile()/
+   * FindNextFile() don’t support recursive matching anyway, so this is safe. */
+  std::string stem;
+  size_t pos = 0;
+  if ((pos = pattern.rfind("/")) != string::npos) // Single = intended
+    stem = pattern.substr(0, pos + 1); // Trailing / included
+
+  // Prepare
+  HANDLE filehandle = INVALID_HANDLE_VALUE;
+  WIN32_FIND_DATAW finddata;
+  memset(&finddata, '\0', sizeof(WIN32_FIND_DATA));
+
+  // Try finding the first file
+  filehandle = FindFirstFileW(utf16_pattern.c_str(), &finddata);
+
+  // Check if some error happened
+  if (filehandle == INVALID_HANDLE_VALUE) {
+    DWORD errval = GetLastError();
+    if (errval == ERROR_FILE_NOT_FOUND) // According to docs, this means no matching files were found. Return empty list.
+      return results;
+    else if (errval != ERROR_SUCCESS)
+      throw Pathie::WindowsError(errval);
+  }
+
+  // All well, save this one...
+  results.push_back(Path(stem + utf16_to_utf8(finddata.cFileName)));
+
+  // ...and continue.
+  while (FindNextFileW(filehandle, &finddata)) {
+    results.push_back(Path(stem + utf16_to_utf8(finddata.cFileName)));
+  }
+
+  DWORD errval = GetLastError();
+  FindClose(filehandle);
+
+  if (errval != ERROR_NO_MORE_FILES)
+    throw(Pathie::WindowsError(errval));
+
+  return results;
+#else
+#error Unsupported system.
+#endif
+}
+
+///@}
+
+/** \name Miscellaneous member functions
+ *
+ * Methods that didn’t fit anywhere else.
+ */
+
+///@{
+
+/**
+ * This method tests whether the referenced path matches the
+ * given pattern under the rules of the local glob-matching
+ * function. Note this method does _not_ access the filesystem,
+ * hence there is no guarantee that the referenced path exists.
+ *
+ * \param[in] pattern The pattern to match.
+ * \param flags Any flags. This parameter is ignored on Windows,
+ * for UNIX refer to the fnmatch(3) manpage.
+ *
+ * \returns Whether the path matches the pattern.
+ *
+ * \remark On Windows, this method uses the [PathMatchSpec()](http://msdn.microsoft.com/en-us/library/bb773727%28VS.85%29.aspx)
+ * function; on UNIX, it uses fnmatch(3).
+ *
+ * \remark Windows’s `PathMatchSpec()` function does not support
+ * recursive matching patterns, while the UNIX fnmatch(8), relying
+ * on glob(7), does.
+ *
+ * \remark Glob patterns on UNIX are generally much more powerful than
+ * those on Windows. Be careful when using anything apart from "*" and "?"
+ * patterns on Windows.
+ *
+ * \see glob() dglob()
+ */
+bool Path::fnmatch(const std::string& pattern, int flags /* = 0 */) const
+{
+#if defined(_PATHIE_UNIX)
+  std::string nstr = native();
+  std::string pattern_nstr = utf8_to_filename(pattern);
+  return ::fnmatch(pattern_nstr.c_str(), nstr.c_str(), flags) == 0;
+#elif defined(_WIN32)
+  std::wstring utf16path = utf8_to_utf16(m_path);
+  std::wstring utf16pattern = utf8_to_utf16(pattern);
+  return PathMatchSpecW(utf16path.c_str(), utf16pattern.c_str());
+#else
+#error Unsupported system.
+#endif
+}
+
+/**
+ * \note This method acceses the filesystem.
+ *
+ * Like glob(), but prepends the referenced path to the glob
+ * pattern.
+ *
+ * \see glob() fnmatch()
+ */
+std::vector<Path> Path::dglob(const std::string& pattern, int flags /* = 0 */) const
+{
+  return glob(m_path + "/" + pattern, flags);
+}
+
+/**
+ * Appends a /, then the new component, and
+ * finally returns a new Path instance.
+ *
+ * \param path New component.
+ *
+ * \returns New Path instance.
+ */
+Path Path::join(Path path) const
+{
+  Path p(m_path + "/" + path.str());
+  return p;
+}
+
+/**
+ * Appends a /, then the new component, and
+ * finally returns a new Path instance.
+ *
+ * \param str New component.
+ *
+ * \returns New Path instance.
+ */
+Path Path::join(std::string str) const
+{
+  Path path(m_path + "/" + str);
+  return path;
+}
+
+/**
+ * Replaces the current extension with the given new extension
+ * and returns the result. If the referenced path doesn’t have
+ * a file extension currently, the new extension is appended.
+ *
+ * \param new_extension The new extension. If the leading point
+ * is missing, it will automatically be prepended.
+ *
+ * \returns The new Path instance.
+ */
+Path Path::sub_ext(std::string new_extension) const
+{
+  // If the point is missing, add it to the beginning.
+  if (new_extension.find(".") == string::npos)
+    new_extension.insert(0, ".");
+
+  std::string old_extension = extension();
+  if (old_extension.empty()) {
+    return Path(m_path + new_extension);
+  }
+  else {
+    size_t pos = m_path.find(old_extension);
+    return Path(m_path.substr(0, pos) + new_extension);
+  }
+}
+
+///@}
diff --git a/src/3rd_party/pathie-cpp/src/pathie.cpp b/src/3rd_party/pathie-cpp/src/pathie.cpp
new file mode 100644
index 00000000..9df1f733
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/pathie.cpp
@@ -0,0 +1,226 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/pathie.hpp"
+#include "../include/errors.hpp"
+
+#if defined(_WIN32)
+#include <windows.h>
+
+/**
+ * Converts a UTF-16LE string into UTF-8. Only available
+ * on Windows.
+ */
+std::string Pathie::utf16_to_utf8(std::wstring str)
+{
+  int size = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0, NULL, NULL);
+
+  char* utf8 = (char*) malloc(size); // sizeof(char) = 1 per ANSI C standard.
+  memset(utf8, 0, size);
+
+  size = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.length(), utf8, size,  NULL, NULL);
+
+  if (size == 0)
+    throw(Pathie::WindowsError(GetLastError()));
+
+  std::string utf8str(utf8, size);
+  free(utf8);
+
+  return utf8str;
+}
+
+/**
+ * Converts a UTF-8 string into UTF-16LE. Only available
+ * on Windows.
+ */
+std::wstring Pathie::utf8_to_utf16(std::string str)
+{
+  int count = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
+
+  wchar_t* utf16 = (wchar_t*) malloc(count * sizeof(wchar_t));
+  memset(utf16, 0, count * sizeof(wchar_t));
+
+  count = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), utf16, count);
+
+  if (count == 0)
+    throw(Pathie::WindowsError(GetLastError()));
+
+  std::wstring utf16str(utf16, count);
+  free(utf16);
+
+  return utf16str;
+}
+#endif
+
+#ifdef _PATHIE_UNIX
+#include <cstring>
+#include <cstdlib>
+#include <errno.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <sys/param.h> // defines "BSD" macro on BSD systems
+
+/* iconv() function family is available on every POSIX-conformant
+ * system. In POSIX.1-2008, it’s specified in the "System Interfaces"
+ * section.
+ *
+ * nl_langinfo() is also specified by POSIX, though I’ve found no evidence
+ * that iconv() is required to understand the encoding output by nl_langinfo(CODESET).
+ * From checking on Linux and FreeBSD, this however seems very likely, so we have
+ * to assume that this always is the case.
+ */
+
+/**
+ * This function converts the given string from the given source encoding
+ * to another given target encoding and returns the result as a std::string.
+ *
+ * \param[in] from_encoding Convert from this encoding.
+ * \param[in] to_encoding Convert into this encoding.
+ * \param[in] string The string to convert.
+ *
+ * \returns The converted string.
+ *
+ * \remark See the output of the `iconv --list` command for a list of
+ * supported encodings.
+ */
+std::string Pathie::convert_encodings(const char* from_encoding, const char* to_encoding, const std::string& string)
+{
+  size_t input_length = string.length();
+
+  // We need a C string working copy that isn’t const
+  char* copy = (char*) malloc(input_length + 1); // Terminating NUL
+  strcpy(copy, string.c_str());
+
+  // Set up the encoding converter
+  iconv_t converter    = iconv_open(to_encoding, from_encoding);
+  size_t outbytes_left = 0;
+  size_t inbytes_left  = input_length;
+
+  if (converter == (iconv_t) -1)
+    throw Pathie::ErrnoError(errno);
+
+  /* There is no way to know how much space iconv() will need. So we keep
+   * allocating more and more memory as needed. `current_size' keeps track
+   * of how large our memory blob is currently. `outbuf' is the pointer to
+   * that memory blob. */
+  size_t current_size = input_length + 1; // NUL
+  char* outbuf        = NULL;
+  char* inbuf         = copy; // Copy the pointer
+
+  int errsav = 0;
+  outbytes_left = current_size;
+  while(true) {
+    outbuf         = (char*) realloc(outbuf - (current_size - outbytes_left), current_size + 10);
+    current_size  += 10;
+    outbytes_left += 10;
+
+    errno  = 0;
+    errsav = 0;
+
+#ifdef BSD
+    // What the heck. FreeBSD violates POSIX.1-2008: it declares iconv()
+    // differently than mandated by POSIX: http://pubs.opengroup.org/onlinepubs/9699919799/functions/iconv.html
+    // (it declares a `const' where it must not be).
+    iconv(converter, const_cast<const char**>(&inbuf), &inbytes_left, &outbuf, &outbytes_left); // sets outbytes_left to 0 or very low values if not enough space (E2BIG)
+#else
+    iconv(converter, &inbuf, &inbytes_left, &outbuf, &outbytes_left); // sets outbytes_left to 0 or very low values if not enough space (E2BIG)
+#endif
+    errsav = errno;
+
+    if (errsav != E2BIG) {
+      break;
+    }
+  }
+
+  iconv_close(converter);
+  free(copy);
+
+  size_t count = current_size - outbytes_left;
+  outbuf -= count; // iconv() advances the pointer!
+
+  if (errsav != 0) {
+    free(outbuf);
+    throw(Pathie::ErrnoError(errsav));
+  }
+
+  std::string result(outbuf, count);
+  free(outbuf);
+
+  return result;
+}
+
+/**
+ * Converts the given UTF-8 string into the native filename encoding.
+ */
+std::string Pathie::utf8_to_filename(const std::string& utf8)
+{
+  bool fs_encoding_is_utf8 = false;
+
+#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
+  fs_encoding_is_utf8 = true;
+#else
+  char* fsencoding = NULL;
+  fsencoding = nl_langinfo(CODESET);
+  fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
+#endif
+
+  // Skip the expensive convert_encodings() call if the filesystem
+  // encoding already is UTF-8.
+  if (fs_encoding_is_utf8) {
+    return std::string(utf8);
+  }
+
+  return convert_encodings("UTF-8", fsencoding, utf8);
+}
+
+/**
+ * Converts the given string in native filesystem encoding to
+ * UTF-8.
+ */
+std::string Pathie::filename_to_utf8(const std::string& native_filename)
+{
+  bool fs_encoding_is_utf8 = false;
+
+#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
+  fs_encoding_is_utf8 = true;
+#else
+  char* fsencoding = NULL;
+  fsencoding = nl_langinfo(CODESET);
+  fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
+#endif
+
+  // Skip the expensive convert_encodings() call if the filesystem
+  // encoding already is UTF-8.
+  if (fs_encoding_is_utf8) {
+    return std::string(native_filename);
+  }
+
+  return convert_encodings(fsencoding, "UTF-8", native_filename);
+}
+#endif
diff --git a/src/3rd_party/pathie-cpp/src/pathie_ifstream.cpp b/src/3rd_party/pathie-cpp/src/pathie_ifstream.cpp
new file mode 100644
index 00000000..06b80731
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/pathie_ifstream.cpp
@@ -0,0 +1,320 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/pathie_ifstream.hpp"
+
+#include <cstdlib>
+
+namespace Pathie {
+#if defined(_PATHIE_UNIX)
+  // All well and easy under UNIX. Just delegate to standard constructor.
+  Pathie::ifstream::ifstream(Pathie::Path path, std::ios_base::openmode mode)
+    : std::ifstream(path.native().c_str(), mode)
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream()
+    : std::ifstream()
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream(std::string path, std::ios_base::openmode mode)
+    : std::ifstream(utf8_to_filename(path).c_str(), mode)
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream(char* path, std::ios_base::openmode mode)
+    : std::ifstream(utf8_to_filename(path).c_str(), mode)
+  {
+    //
+  }
+
+  void Pathie::ifstream::open(const char* filename, ios_base::openmode mode)
+  {
+    std::string filename_nstr = utf8_to_filename(filename);
+    std::ifstream::open(filename_nstr.c_str(), mode);
+  }
+
+  void Pathie::ifstream::open(const std::string& filename, ios_base::openmode mode)
+  {
+    std::ifstream::open(utf8_to_filename(filename).c_str(), mode);
+  }
+
+  void Pathie::ifstream::open(const Pathie::Path& filename, ios_base::openmode mode)
+  {
+    std::ifstream::open(filename.native().c_str(), mode);
+  }
+
+
+#elif defined (_WIN32)
+#  if defined(_MSC_VER)
+  // Easy again under MSVC under Windows; using Microsoft’s nonstandard constructor
+  // for Unicode filenames.
+  // It is documented here: http://msdn.microsoft.com/en-us/library/8et8s826.aspx
+  Pathie::ifstream::ifstream(Pathie::Path path, std::ios_base::openmode mode)
+    : std::ifstream(path.native(), mode)
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream()
+    : std::ifstream()
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream(std::string path, std::ios_base::openmode mode)
+    : std::ifstream(path, mode)
+  {
+    //
+  }
+
+  Pathie::ifstream::ifstream(char* path, std::ios_base::openmode mode)
+    : std::ifstream(path, mode)
+  {
+    //
+  }
+#  elif defined(__GNUC__)
+  // This one is tough, but solveable. There’s a nonstandard C++ extension by the
+  // GCC team to create a C++ stream from a file descriptor and similar.
+  // It is documented here: https://gcc.gnu.org/onlinedocs/gcc-4.9.2/libstdc++/api/a00054.html
+
+  /**
+   * Default constructor for deferred initialisation via open().
+   * Beware that before you called open(), any methods other than
+   * is_open() may behave unexpectedly!
+   */
+  Pathie::ifstream::ifstream()
+    : std::basic_istream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+  }
+
+  /**
+   * Construct a stream for the given UTF-8 file path.
+   *
+   * \param[in] filename The path to open the stream for. UTF-8.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ifstream::ifstream(const char* filename, ios_base::openmode mode)
+    : std::basic_istream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  /**
+   * Construct a stream for the given UTF-8 file path.
+   *
+   * \param[in] filename The path to open the stream for. UTF-8.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ifstream::ifstream(const std::string& filename, ios_base::openmode mode)
+    : std::basic_istream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  /**
+   * Construct a stream for the given Pathie::Path instance.
+   *
+   * \param[in] filename The path to open the stream for. A Pathie::Path instance.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ifstream::ifstream(const Pathie::Path& filename, ios_base::openmode mode)
+    : std::basic_istream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  Pathie::ifstream::~ifstream()
+  {
+    free(mp_filebuffer);
+  }
+
+  /**
+   * The underlying buffer.
+   */
+  __gnu_cxx::stdio_filebuf<char>* Pathie::ifstream::rdbuf() const
+  {
+    return mp_filebuffer;
+  }
+
+  /**
+   * Checks whether the stream has been open()ed already. This is the only
+   * method safe to use before you called open() on a stream constructed
+   * with the default constructor (apart from open() itself of course).
+   */
+  bool Pathie::ifstream::is_open() const
+  {
+    if (!m_buffer_allocated)
+      return false;
+
+    return mp_filebuffer->is_open();
+  }
+
+  /**
+   * Open the given UTF-8 file path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename UTF-8 filename to open
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ifstream::open(const char* filename, ios_base::openmode mode)
+  {
+    std::wstring w_filename = Pathie::utf8_to_utf16(filename);
+
+    mp_file = _wfopen(w_filename.c_str(), L"r"); // Mode will be overridden
+    if (!mp_file) {
+      setstate(ios_base::failbit);
+      return;
+    }
+
+    /* The following construction uses a “placement new” as it appears
+     * to be the only "clean" solution applicable. The init() method,
+     * an internum of the GCC implementation of basic_istream that
+     * needs to be called in the stream’s constructor, requires a
+     * pointer to the filebuffer object. However, we do not have that
+     * filebuffer object at hand in the constructor, the
+     * __gnu_cxx::stdio_filebuf instance will be created later when
+     * open() is called. It is impossible to construct it earlier,
+     * because it does not support a delayed open() call, the file
+     * descriptor or FILE* pointer must be passed during its
+     * construction, but we don’t have it there; it is available only
+     * in open() -- remember that you can create the ifstream instance
+     * without being attached to a file and then call open() later
+     * with a filename. To be able to pass something meaningful to
+     * init(), we have to "foresee" where in memory the stdio_filebuf
+     * instance will be created. This only is possible with a
+     * placement new into a place we have allocated previously using
+     * malloc().
+     *
+     * An alternative would be to use internal GCC APIs by duplicating
+     * the sourcecode of the __gnu_cxx::stdio_filebuf constructor; however
+     * undocumented internal APIs are never good to use. For informational
+     * purposes therefore the sourcecode link:
+     *
+     *   https://gcc.gnu.org/onlinedocs/gcc-4.9.2/libstdc++/api/a01222_source.html
+     */
+
+    new (mp_filebuffer) __gnu_cxx::stdio_filebuf<char>(mp_file, mode);
+    m_buffer_allocated = true;
+
+    if (!mp_filebuffer->is_open())
+      setstate(ios_base::failbit);
+    else
+      clear();
+  }
+
+  /**
+   * Open the given UTF-8 file path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename UTF-8 filename to open
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ifstream::open(const std::string& filename, ios_base::openmode mode)
+  {
+    open(filename.c_str(), mode);
+  }
+
+  /**
+   * Open the given Pathie::Path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename Pathie::Path to open the stream for.
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ifstream::open(const Pathie::Path& filename, ios_base::openmode mode)
+  {
+    open(filename.str(), mode);
+  }
+
+  /**
+   * Close the underlying file. Has no effect if no file is opened.
+   */
+  void Pathie::ifstream::close()
+  {
+    if (mp_file) {
+      if (!mp_filebuffer->close())
+        setstate(ios_base::failbit);
+
+      // Do not deallocate, we may need it later if an open() call follows.
+      memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+      m_buffer_allocated = false;
+      fclose(mp_file);
+    }
+  }
+#  else
+#    error Unsupported compiler: do not know how to open C++ stream on Unicode file.
+#  endif
+#else
+#  error Unsupported system.
+#endif
+};
diff --git a/src/3rd_party/pathie-cpp/src/pathie_ofstream.cpp b/src/3rd_party/pathie-cpp/src/pathie_ofstream.cpp
new file mode 100644
index 00000000..f1085043
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/pathie_ofstream.cpp
@@ -0,0 +1,326 @@
+/* -*- coding: utf-8 -*-
+ * This file is part of Pathie.
+ *
+ * Copyright © 2015, 2017 Marvin Gülker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../include/pathie_ofstream.hpp"
+
+#if defined(_WIN32) && defined(__GNUC__)
+#include <cstdio>
+#include <cstdlib>
+#endif
+
+namespace Pathie {
+#if defined(_PATHIE_UNIX)
+  // All well and easy under UNIX. Just delegate to standard constructor.
+  Pathie::ofstream::ofstream(Pathie::Path path, std::ios_base::openmode mode)
+    : std::ofstream(path.native().c_str(), mode)
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream()
+    : std::ofstream()
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream(std::string path, std::ios_base::openmode mode)
+    : std::ofstream(utf8_to_filename(path).c_str(), mode)
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream(char* path, std::ios_base::openmode mode)
+    : std::ofstream(utf8_to_filename(path).c_str(), mode)
+  {
+    //
+  }
+
+  void Pathie::ofstream::open(const char* filename, ios_base::openmode mode)
+  {
+    std::string filename_nstr = utf8_to_filename(filename);
+    std::ofstream::open(filename_nstr.c_str(), mode);
+  }
+
+  void Pathie::ofstream::open(const std::string& filename, ios_base::openmode mode)
+  {
+    std::ofstream::open(utf8_to_filename(filename).c_str(), mode);
+  }
+
+  void Pathie::ofstream::open(const Pathie::Path& filename, ios_base::openmode mode)
+  {
+    std::ofstream::open(filename.native().c_str(), mode);
+  }
+
+
+#elif defined (_WIN32)
+#  if defined(_MSC_VER)
+  // Easy again under MSVC under Windows; using Microsoft’s nonstandard constructor
+  // for Unicode filenames.
+  // It is documented here: http://msdn.microsoft.com/en-us/library/8et8s826.aspx
+  Pathie::ofstream::ofstream(Pathie::Path path, std::ios_base::openmode mode)
+    : std::ofstream(path.native(), mode)
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream()
+    : std::ofstream()
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream(std::string path, std::ios_base::openmode mode)
+    : std::ofstream(path, mode)
+  {
+    //
+  }
+
+  Pathie::ofstream::ofstream(char* path, std::ios_base::openmode mode)
+    : std::ofstream(path, mode)
+  {
+    //
+  }
+#  elif defined(__GNUC__)
+  // This one is tough, but solveable. There’s a nonstandard C++ extension by the
+  // GCC team to create a C++ stream from a file descriptor and similar.
+  // It is documented here: https://gcc.gnu.org/onlinedocs/gcc-4.9.2/libstdc++/api/a00054.html
+
+  /**
+   * Default constructor for deferred initialisation via open().
+   * Beware that before you called open(), any methods other than
+   * is_open() may behave unexpectedly!
+   */
+  Pathie::ofstream::ofstream()
+    : std::basic_ostream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+  }
+
+  /**
+   * Construct a stream for the given UTF-8 file path.
+   *
+   * \param[in] filename The path to open the stream for. UTF-8.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ofstream::ofstream(const char* filename, ios_base::openmode mode)
+    : std::basic_ostream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  /**
+   * Construct a stream for the given UTF-8 file path.
+   *
+   * \param[in] filename The path to open the stream for. UTF-8.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ofstream::ofstream(const std::string& filename, ios_base::openmode mode)
+    : std::basic_ostream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  /**
+   * Construct a stream for the given Pathie::Path instance.
+   *
+   * \param[in] filename The path to open the stream for. A Pathie::Path instance.
+   * \param     mode Mode to open the file in.
+   */
+  Pathie::ofstream::ofstream(const Pathie::Path& filename, ios_base::openmode mode)
+    : std::basic_ostream<char, std::char_traits<char> >()
+  {
+    mp_file = NULL;
+    mp_filebuffer = NULL;
+    m_buffer_allocated = false;
+
+    // See the lengthy explanation in open() for why we do this here.
+    mp_filebuffer = (__gnu_cxx::stdio_filebuf<char>*) malloc(sizeof(__gnu_cxx::stdio_filebuf<char>));
+    memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+
+    this->init(mp_filebuffer);
+    this->open(filename, mode);
+  }
+
+  Pathie::ofstream::~ofstream()
+  {
+    free(mp_filebuffer);
+  }
+
+  /**
+   * The underlying buffer.
+   */
+  __gnu_cxx::stdio_filebuf<char>* Pathie::ofstream::rdbuf() const
+  {
+    return mp_filebuffer;
+  }
+
+  /**
+   * Checks whether the stream has been open()ed already. This is the only
+   * method safe to use before you called open() on a stream constructed
+   * with the default constructor (apart from open() itself of course).
+   */
+  bool Pathie::ofstream::is_open() const
+  {
+    if (!m_buffer_allocated)
+      return false;
+
+    return mp_filebuffer->is_open();
+  }
+
+  /**
+   * Open the given UTF-8 file path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename UTF-8 filename to open
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ofstream::open(const char* filename, ios_base::openmode mode)
+  {
+    std::wstring w_filename = Pathie::utf8_to_utf16(filename);
+
+    mp_file = _wfopen(w_filename.c_str(),
+      (mode & ios_base::trunc) ? L"w" : L"a");
+
+    if (!mp_file) {
+      setstate(ios_base::failbit);
+      return;
+    }
+
+    /* The following construction uses a “placement new” as it appears
+     * to be the only "clean" solution applicable. The init() method,
+     * an internum of the GCC implementation of basic_ostream that
+     * needs to be called in the stream’s constructor, requires a
+     * pointer to the filebuffer object. However, we do not have that
+     * filebuffer object at hand in the constructor, the
+     * __gnu_cxx::stdio_filebuf instance will be created later when
+     * open() is called. It is impossible to construct it earlier,
+     * because it does not support a delayed open() call, the file
+     * descriptor or FILE* pointer must be passed during its
+     * construction, but we don’t have it there; it is available only
+     * in open() -- remember that you can create the ofstream instance
+     * without being attached to a file and then call open() later
+     * with a filename. To be able to pass something meaningful to
+     * init(), we have to "foresee" where in memory the stdio_filebuf
+     * instance will be created. This only is possible with a
+     * placement new into a place we have allocated previously using
+     * malloc().
+     *
+     * An alternative would be to use internal GCC APIs by duplicating
+     * the sourcecode of the __gnu_cxx::stdio_filebuf constructor; however
+     * undocumented internal APIs are never good to use. For informational
+     * purposes therefore the sourcecode link:
+     *
+     *   https://gcc.gnu.org/onlinedocs/gcc-4.9.2/libstdc++/api/a01222_source.html
+     */
+
+    new (mp_filebuffer) __gnu_cxx::stdio_filebuf<char>(mp_file, mode);
+    m_buffer_allocated = true;
+
+    if (!mp_filebuffer->is_open())
+      setstate(ios_base::failbit);
+    else
+      clear();
+  }
+
+  /**
+   * Open the given UTF-8 file path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename UTF-8 filename to open
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ofstream::open(const std::string& filename, ios_base::openmode mode)
+  {
+    open(filename.c_str(), mode);
+  }
+
+  /**
+   * Open the given Pathie::Path in this stream. You can call this anytime
+   * after you constructed an instance with the default constructor; otherwise,
+   * you have to close() whatever was opened before you call this method.
+   *
+   * \param[in] filename Pathie::Path to open the stream for.
+   * \param     mode Mode to open the stream in.
+   */
+  void Pathie::ofstream::open(const Pathie::Path& filename, ios_base::openmode mode)
+  {
+    open(filename.str(), mode);
+  }
+
+  /**
+   * Close the underlying file. Has no effect if no file is opened.
+   */
+  void Pathie::ofstream::close()
+  {
+    if (mp_file) {
+      if (!mp_filebuffer->close())
+        setstate(ios_base::failbit);
+
+      // Do not deallocate, we may need it later if an open() call follows.
+      memset(mp_filebuffer, '\0', sizeof(__gnu_cxx::stdio_filebuf<char>));
+      m_buffer_allocated = false;
+      fclose(mp_file);
+    }
+  }
+
+#  else
+#    error Unsupported compiler: do not know how to open C++ stream on Unicode file.
+#  endif
+#else
+#  error Unsupported system.
+#endif
+};
diff --git a/src/3rd_party/pathie-cpp/src/temp.cpp b/src/3rd_party/pathie-cpp/src/temp.cpp
new file mode 100644
index 00000000..ae51bf87
--- /dev/null
+++ b/src/3rd_party/pathie-cpp/src/temp.cpp
@@ -0,0 +1,197 @@
+#include "../include/temp.hpp"
+#include <sstream>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+
+#if defined(_PATHIE_UNIX)
+#include <sys/types.h>
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <Windows.h>
+#else
+#error Unsupported system
+#endif
+
+using namespace Pathie;
+
+static std::string generate_random_filename(const std::string& namepart)
+{
+  std::stringstream name;
+  name << namepart << "-" << time(NULL) << rand();
+
+#if defined(_PATHIE_UNIX)
+  name << getpid();
+#elif defined(_WIN32)
+  name << GetCurrentProcessId();
+#else
+#error Unsupported system
+#endif
+
+  return name.str();
+}
+
+/**
+ * TempEntry is not meant to be instanciated on itself. This
+ * constructor does the common work between the Tempdir and Tempfile
+ * classes, namely it generates a temporary filename that is not
+ * currently in use.
+ *
+ * \param namepart
+ * A string that will be included verbatim into the basename
+ * of the created directory.
+ *
+ * \remark The generated path name is of form
+ * `<namepart>-<currenttime><random><pid>`. However, future releases
+ * may change this format, so do not rely on it.
+ */
+TempEntry::TempEntry(std::string namepart)
+  : m_keep(false)
+{
+  do {
+    m_path = Path::temp_dir() / generate_random_filename(namepart);
+  } while (m_path.exists());
+}
+
+/**
+ * Destructor.
+ */
+TempEntry::~TempEntry()
+{
+  //
+}
+
+/**
+ * Returns the absolute path to the temporary entry
+ * that was created by the constructor.
+ */
+Path TempEntry::path() const
+{
+  return m_path;
+}
+
+/**
+ * Call this function if you do not want the destructor to delete
+ * the created temporary entry. You can still expressly delete
+ * the temporary entry by calling remove().
+ *
+ * \param k
+ * If true (default), the destructor will not delete the temporary entry.
+ * If false, the destructor will delete the temporary entry.
+ */
+void TempEntry::keep(bool k)
+{
+  m_keep = k;
+}
+
+/**
+ * Returns the keep status; see keep().
+ */
+bool TempEntry::is_kept() const
+{
+  return m_keep;
+}
+
+/**
+ * Constructs an instance of this class. A temporary directory
+ * is created that will be recursively removed when the object
+ * is deleted.
+ *
+ * \param namepart
+ * A string that will be included verbatim into the basename
+ * of the created directory.
+ *
+ * \returns The newly created instance.
+ *
+ * \remark There is a small timespan between the generation of the
+ * temporary path name and the creation of the directory in which it
+ * is theoretically possible for another process to create an entry
+ * that conflicts with the generated name. However, since the
+ * generated name includes a random number, the process identifier,
+ * and the number of seconds since epoch as well as the given
+ * `namepart`, the chance of an accidental collision is very low.
+ * Even a malicious attacker would have to guess the random number, so
+ * if your `srand()` seed is chosen properly and your C standard
+ * library is properly impelemented, this risk is again very low.
+ */
+Tempdir::Tempdir(std::string namepart)
+  : TempEntry(namepart)
+{
+  m_path.mktree();
+}
+
+/**
+ * Destructor, removes the temporary entry unless keep() has been called.
+ * Does nothing if the temporary file does not exist anymore for whatever
+ * reason.
+ */
+Tempdir::~Tempdir()
+{
+  if (!m_keep)
+    remove();
+}
+
+/**
+ * Recursively removes the temporary directory. This method
+ * ignores what was set with keep(), i.e., it *always* deletes
+ * the temporary directory if you call it. This method does
+ * nothing if the directory does not exist anymore for whatever
+ * reason.
+ */
+void Tempdir::remove() const
+{
+  if (m_path.exists())
+    m_path.rmtree();
+}
+
+/**
+ * Constructs an instance of this class. A temporary file
+ * is created that will be recursively removed when the object
+ * is deleted.
+ *
+ * \param namepart
+ * A string that will be included verbatim into the basename
+ * of the created filename.
+ *
+ * \returns The newly created instance.
+ *
+ * \remark There is a small timespan between the generation of the
+ * temporary path name and the creation of the file in which it
+ * is theoretically possible for another process to create an entry
+ * that conflicts with the generated name. However, since the
+ * generated name includes a random number, the process identifier,
+ * and the number of seconds since epoch as well as the given
+ * `namepart`, the chance of an accidental collision is very low.
+ * Even a malicious attacker would have to guess the random number, so
+ * if your `srand()` seed is chosen properly and your C standard
+ * library is properly impelemented, this risk is again very low.
+ */
+Tempfile::Tempfile(std::string namepart)
+  : TempEntry(namepart)
+{
+  m_path.touch();
+}
+
+/**
+ * Destructor, removes the temporary file unless keep() has been called.
+ * Does nothing if the temporary directory does not exist anymore for whatever
+ * reason.
+ */
+Tempfile::~Tempfile()
+{
+  if (!m_keep)
+    remove();
+}
+
+/**
+ * Removes the temporary file. This method
+ * ignores what was set with keep(), i.e., it *always* deletes
+ * the temporary file if you call it. This method does nothing
+ * if the file does not exist anymore for whatever
+ * reason.
+ */
+void Tempfile::remove() const
+{
+  if (m_path.exists())
+    m_path.unlink();
+}
diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece
new file mode 160000
+Subproject 1a38d26a13cc67b1aae641d4983b624bef6d530
diff --git a/src/3rd_party/zstr/LICENSE b/src/3rd_party/zstr/LICENSE
new file mode 100644
index 00000000..841c7214
--- /dev/null
+++ b/src/3rd_party/zstr/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE. 
diff --git a/src/3rd_party/zstr/README.org b/src/3rd_party/zstr/README.org
new file mode 100644
index 00000000..bc0dd3e5
--- /dev/null
+++ b/src/3rd_party/zstr/README.org
@@ -0,0 +1,54 @@
+# -*- mode:org; mode:visual-line; coding:utf-8; -*-
+
+** A C++ ZLib wrapper
+
+[[http://travis-ci.org/mateidavid/zstr][http://travis-ci.org/mateidavid/zstr.svg?branch=master]] [[https://tldrlegal.com/license/mit-license][http://img.shields.io/:license-mit-blue.svg]]
+
+This C++ header-only library enables the use of C++ standard iostreams to access ZLib-compressed streams.
+
+For input access (decompression), the compression format is auto-detected, and multiple concatenated compressed streams are decompressed seamlessly.
+
+For output access (compression), the only parameter exposed by this API is the compression level.
+
+Alternatives to this library include:
+
+- The original [[http://www.zlib.net/][ZLib]], through its [[http://www.zlib.net/manual.html][C API]]. This does not interact nicely with C++ iostreams.
+
+- The [[http://www.cs.unc.edu/Research/compgeom/gzstream/][GZStream]] library. This library does not auto-detect input compression, and it cannot wrap streams (only files).
+
+- The [[http://www.boost.org/doc/libs/release/libs/iostreams/][Boost IOStreams]] library. The library does not auto-detect input compression (by default, though that can be easily implemented with filters), and more importantly, it is not a header-only Boost library.
+
+For an example usage, see [[examples/ztxtpipe.cpp]] and [[examples/zc.cpp]].
+
+**** Input Auto-detection
+
+For input access, the library seamlessly auto-detects whether the source stream is compressed or not. The following compressed streams are detected:
+
+- GZip header, when stream starts with =1F 8B=. See [[http://en.wikipedia.org/wiki/Gzip][GZip format]].
+
+- ZLib header, when stream starts with =78 01=, =78 9C=, and =78 DA=. See [[http://stackoverflow.com/a/17176881][answer here]].
+
+If none of these formats are detected, the library assumes the input is not compressed, and it produces a plain copy of the source stream.
+
+**** Classes
+
+The package provides 6 classes for accessing ZLib streams:
+
+- =zstr::istreambuf= is the core decompression class. This is constructed from an existing =std::streambuf= that contains source data. The =zstr::istreambuf= constructor accepts explicit settings for the internal buffer size (default: 1 MB) and the auto-detection option (default: on). ZLib errors cause exceptions to be thrown.
+
+- =zstr::ostreambuf= is the core compression class. This is constructed from an existing =std::streambuf= that contains sink data. The =zstr::ostreambuf= constructor accepts explicit settings for the internal buffer size (default: 1 MB) and the compression option (default: ZLib default). ZLib errors cause exceptions to be thrown.
+
+- =zstr::istream= is a wrapper for a =zstr::istreambuf= that accesses an /external/ =std::streambuf=. It can be constructed from an existing =std::istream= (such as =std::cin=) or =std::streambuf=.
+
+- =zstr::ostream= is a wrapper for a =zstr::ostreambuf= that accesses an /external/ =std::streambuf=. It can be constructed from an existing =std::ostream= (such as =std::cout=) or =std::streambuf=.
+
+- =zstr::ifstream= is a wrapper for a =zstr::istreambuf= that accesses an /internal/ =std::ifstream=. This can be used to open a file and read decompressed data from it.
+
+- =zstr::ofstream= is a wrapper for a =zstr::ostreambuf= that accesses an /internal/ =std::ofstream=. This can be used to open a file and write compressed data to it.
+
+For all stream objects, the =badbit= of their expection mask is turned on in order to propagate exceptions.
+
+**** License
+
+Released under the [[file:LICENSE][MIT license]].
+
diff --git a/src/3rd_party/zstr/strict_fstream.hpp b/src/3rd_party/zstr/strict_fstream.hpp
new file mode 100644
index 00000000..21173c73
--- /dev/null
+++ b/src/3rd_party/zstr/strict_fstream.hpp
@@ -0,0 +1,202 @@
+#ifndef __STRICT_FSTREAM_HPP
+#define __STRICT_FSTREAM_HPP
+
+#include <cassert>
+#include <fstream>
+#include <cstring>
+#include <string>
+
+/**
+ * This namespace defines wrappers for std::ifstream, std::ofstream, and
+ * std::fstream objects. The wrappers perform the following steps:
+ * - check the open modes make sense
+ * - check that the call to open() is successful
+ * - (for input streams) check that the opened file is peek-able
+ * - turn on the badbit in the exception mask
+ */
+namespace strict_fstream
+{
+
+/// Overload of error-reporting function, to enable use with VS.
+/// Ref: http://stackoverflow.com/a/901316/717706
+static std::string strerror()
+{
+    std::string buff(80, '\0');
+#ifdef _WIN32
+    if (strerror_s(&buff[0], buff.size(), errno) != 0)
+    {
+        buff = "Unknown error";
+    }
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE
+// XSI-compliant strerror_r()
+    if (strerror_r(errno, &buff[0], buff.size()) != 0)
+    {
+        buff = "Unknown error";
+    }
+#else
+// GNU-specific strerror_r()
+    auto p = strerror_r(errno, &buff[0], buff.size());
+    std::string tmp(p, std::strlen(p));
+    std::swap(buff, tmp);
+#endif
+    buff.resize(buff.find('\0'));
+    return buff;
+}
+
+/// Exception class thrown by failed operations.
+class Exception
+    : public std::exception
+{
+public:
+    Exception(const std::string& msg) : _msg(msg) {}
+    const char * what() const noexcept { return _msg.c_str(); }
+private:
+    std::string _msg;
+}; // class Exception
+
+namespace detail
+{
+
+struct static_method_holder
+{
+    static std::string mode_to_string(std::ios_base::openmode mode)
+    {
+        static const int n_modes = 6;
+        static const std::ios_base::openmode mode_val_v[n_modes] =
+            {
+                std::ios_base::in,
+                std::ios_base::out,
+                std::ios_base::app,
+                std::ios_base::ate,
+                std::ios_base::trunc,
+                std::ios_base::binary
+            };
+
+        static const char * mode_name_v[n_modes] =
+            {
+                "in",
+                "out",
+                "app",
+                "ate",
+                "trunc",
+                "binary"
+            };
+        std::string res;
+        for (int i = 0; i < n_modes; ++i)
+        {
+            if (mode & mode_val_v[i])
+            {
+                res += (! res.empty()? "|" : "");
+                res += mode_name_v[i];
+            }
+        }
+        if (res.empty()) res = "none";
+        return res;
+    }
+    static void check_mode(const std::string& filename, std::ios_base::openmode mode)
+    {
+        if ((mode & std::ios_base::trunc) && ! (mode & std::ios_base::out))
+        {
+            throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: trunc and not out");
+        }
+        else if ((mode & std::ios_base::app) && ! (mode & std::ios_base::out))
+        {
+            throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: app and not out");
+        }
+        else if ((mode & std::ios_base::trunc) && (mode & std::ios_base::app))
+        {
+            throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: trunc and app");
+        }
+     }
+    static void check_open(std::ios * s_p, const std::string& filename, std::ios_base::openmode mode)
+    {
+        if (s_p->fail())
+        {
+            throw Exception(std::string("strict_fstream: open('")
+                            + filename + "'," + mode_to_string(mode) + "): open failed: "
+                            + strerror());
+        }
+    }
+    static void check_peek(std::istream * is_p, const std::string& filename, std::ios_base::openmode mode)
+    {
+        bool peek_failed = true;
+        try
+        {
+            is_p->peek();
+            peek_failed = is_p->fail();
+        }
+        catch (std::ios_base::failure e) {}
+        if (peek_failed)
+        {
+            throw Exception(std::string("strict_fstream: open('")
+                            + filename + "'," + mode_to_string(mode) + "): peek failed: "
+                            + strerror());
+        }
+        is_p->clear();
+    }
+}; // struct static_method_holder
+
+} // namespace detail
+
+class ifstream
+    : public std::ifstream
+{
+public:
+    ifstream() = default;
+    ifstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+    {
+        open(filename, mode);
+    }
+    void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+    {
+        mode |= std::ios_base::in;
+        exceptions(std::ios_base::badbit);
+        detail::static_method_holder::check_mode(filename, mode);
+        std::ifstream::open(filename, mode);
+        detail::static_method_holder::check_open(this, filename, mode);
+        detail::static_method_holder::check_peek(this, filename, mode);
+    }
+}; // class ifstream
+
+class ofstream
+    : public std::ofstream
+{
+public:
+    ofstream() = default;
+    ofstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out)
+    {
+        open(filename, mode);
+    }
+    void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out)
+    {
+        mode |= std::ios_base::out;
+        exceptions(std::ios_base::badbit);
+        detail::static_method_holder::check_mode(filename, mode);
+        std::ofstream::open(filename, mode);
+        detail::static_method_holder::check_open(this, filename, mode);
+    }
+}; // class ofstream
+
+class fstream
+    : public std::fstream
+{
+public:
+    fstream() = default;
+    fstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+    {
+        open(filename, mode);
+    }
+    void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+    {
+        if (! (mode & std::ios_base::out)) mode |= std::ios_base::in;
+        exceptions(std::ios_base::badbit);
+        detail::static_method_holder::check_mode(filename, mode);
+        std::fstream::open(filename, mode);
+        detail::static_method_holder::check_open(this, filename, mode);
+        detail::static_method_holder::check_peek(this, filename, mode);
+    }
+}; // class fstream
+
+} // namespace strict_fstream
+
+#endif
diff --git a/src/3rd_party/zstr/zstr.hpp b/src/3rd_party/zstr/zstr.hpp
new file mode 100644
index 00000000..6b633728
--- /dev/null
+++ b/src/3rd_party/zstr/zstr.hpp
@@ -0,0 +1,411 @@
+//---------------------------------------------------------
+// Copyright 2015 Ontario Institute for Cancer Research
+// Written by Matei David (matei@cs.toronto.edu)
+//---------------------------------------------------------
+
+// Reference:
+// http://stackoverflow.com/questions/14086417/how-to-write-custom-input-stream-in-c
+
+#ifndef __ZSTR_HPP
+#define __ZSTR_HPP
+
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <zlib.h>
+#include "strict_fstream.hpp"
+
+namespace zstr
+{
+
+/// Exception class thrown by failed zlib operations.
+class Exception
+    : public std::exception
+{
+public:
+    Exception(z_stream * zstrm_p, int ret)
+        : _msg("zlib: ")
+    {
+        switch (ret)
+        {
+        case Z_STREAM_ERROR:
+            _msg += "Z_STREAM_ERROR: ";
+            break;
+        case Z_DATA_ERROR:
+            _msg += "Z_DATA_ERROR: ";
+            break;
+        case Z_MEM_ERROR:
+            _msg += "Z_MEM_ERROR: ";
+            break;
+        case Z_VERSION_ERROR:
+            _msg += "Z_VERSION_ERROR: ";
+            break;
+        case Z_BUF_ERROR:
+            _msg += "Z_BUF_ERROR: ";
+            break;
+        default:
+            std::ostringstream oss;
+            oss << ret;
+            _msg += "[" + oss.str() + "]: ";
+            break;
+        }
+        _msg += zstrm_p->msg;
+    }
+    Exception(const std::string msg) : _msg(msg) {}
+    const char * what() const noexcept { return _msg.c_str(); }
+private:
+    std::string _msg;
+}; // class Exception
+
+namespace detail
+{
+
+class z_stream_wrapper
+    : public z_stream
+{
+public:
+    z_stream_wrapper(bool _is_input = true, int _level = Z_DEFAULT_COMPRESSION)
+        : is_input(_is_input)
+    {
+        this->zalloc = Z_NULL;
+        this->zfree = Z_NULL;
+        this->opaque = Z_NULL;
+        int ret;
+        if (is_input)
+        {
+            this->avail_in = 0;
+            this->next_in = Z_NULL;
+            ret = inflateInit2(this, 15+32);
+        }
+        else
+        {
+            ret = deflateInit2(this, _level, Z_DEFLATED, 15+16, 8, Z_DEFAULT_STRATEGY);
+        }
+        if (ret != Z_OK) throw Exception(this, ret);
+    }
+    ~z_stream_wrapper()
+    {
+        if (is_input)
+        {
+            inflateEnd(this);
+        }
+        else
+        {
+            deflateEnd(this);
+        }
+    }
+private:
+    bool is_input;
+}; // class z_stream_wrapper
+
+} // namespace detail
+
+class istreambuf
+    : public std::streambuf
+{
+public:
+    istreambuf(std::streambuf * _sbuf_p,
+               std::size_t _buff_size = default_buff_size, bool _auto_detect = true)
+        : sbuf_p(_sbuf_p),
+          zstrm_p(nullptr),
+          buff_size(_buff_size),
+          auto_detect(_auto_detect),
+          auto_detect_run(false),
+          is_text(false)
+    {
+        assert(sbuf_p);
+        in_buff = new char [buff_size];
+        in_buff_start = in_buff;
+        in_buff_end = in_buff;
+        out_buff = new char [buff_size];
+        setg(out_buff, out_buff, out_buff);
+    }
+
+    istreambuf(const istreambuf &) = delete;
+    istreambuf(istreambuf &&) = default;
+    istreambuf & operator = (const istreambuf &) = delete;
+    istreambuf & operator = (istreambuf &&) = default;
+
+    virtual ~istreambuf()
+    {
+        delete [] in_buff;
+        delete [] out_buff;
+        if (zstrm_p) delete zstrm_p;
+    }
+
+    virtual std::streambuf::int_type underflow()
+    {
+        if (this->gptr() == this->egptr())
+        {
+            // pointers for free region in output buffer
+            char * out_buff_free_start = out_buff;
+            do
+            {
+                // read more input if none available
+                if (in_buff_start == in_buff_end)
+                {
+                    // empty input buffer: refill from the start
+                    in_buff_start = in_buff;
+                    std::streamsize sz = sbuf_p->sgetn(in_buff, buff_size);
+                    in_buff_end = in_buff + sz;
+                    if (in_buff_end == in_buff_start) break; // end of input
+                }
+                // auto detect if the stream contains text or deflate data
+                if (auto_detect && ! auto_detect_run)
+                {
+                    auto_detect_run = true;
+                    unsigned char b0 = *reinterpret_cast< unsigned char * >(in_buff_start);
+                    unsigned char b1 = *reinterpret_cast< unsigned char * >(in_buff_start + 1);
+                    // Ref:
+                    // http://en.wikipedia.org/wiki/Gzip
+                    // http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like
+                    is_text = ! (in_buff_start + 2 <= in_buff_end
+                                 && ((b0 == 0x1F && b1 == 0x8B)         // gzip header
+                                     || (b0 == 0x78 && (b1 == 0x01      // zlib header
+                                                        || b1 == 0x9C
+                                                        || b1 == 0xDA))));
+                }
+                if (is_text)
+                {
+                    // simply swap in_buff and out_buff, and adjust pointers
+                    assert(in_buff_start == in_buff);
+                    std::swap(in_buff, out_buff);
+                    out_buff_free_start = in_buff_end;
+                    in_buff_start = in_buff;
+                    in_buff_end = in_buff;
+                }
+                else
+                {
+                    // run inflate() on input
+                    if (! zstrm_p) zstrm_p = new detail::z_stream_wrapper(true);
+                    zstrm_p->next_in = reinterpret_cast< decltype(zstrm_p->next_in) >(in_buff_start);
+                    zstrm_p->avail_in = in_buff_end - in_buff_start;
+                    zstrm_p->next_out = reinterpret_cast< decltype(zstrm_p->next_out) >(out_buff_free_start);
+                    zstrm_p->avail_out = (out_buff + buff_size) - out_buff_free_start;
+                    int ret = inflate(zstrm_p, Z_NO_FLUSH);
+                    // process return code
+                    if (ret != Z_OK && ret != Z_STREAM_END) throw Exception(zstrm_p, ret);
+                    // update in&out pointers following inflate()
+                    in_buff_start = reinterpret_cast< decltype(in_buff_start) >(zstrm_p->next_in);
+                    in_buff_end = in_buff_start + zstrm_p->avail_in;
+                    out_buff_free_start = reinterpret_cast< decltype(out_buff_free_start) >(zstrm_p->next_out);
+                    assert(out_buff_free_start + zstrm_p->avail_out == out_buff + buff_size);
+                    // if stream ended, deallocate inflator
+                    if (ret == Z_STREAM_END)
+                    {
+                        delete zstrm_p;
+                        zstrm_p = nullptr;
+                    }
+                }
+            } while (out_buff_free_start == out_buff);
+            // 2 exit conditions:
+            // - end of input: there might or might not be output available
+            // - out_buff_free_start != out_buff: output available
+            this->setg(out_buff, out_buff, out_buff_free_start);
+        }
+        return this->gptr() == this->egptr()
+            ? traits_type::eof()
+            : traits_type::to_int_type(*this->gptr());
+    }
+private:
+    std::streambuf * sbuf_p;
+    char * in_buff;
+    char * in_buff_start;
+    char * in_buff_end;
+    char * out_buff;
+    detail::z_stream_wrapper * zstrm_p;
+    std::size_t buff_size;
+    bool auto_detect;
+    bool auto_detect_run;
+    bool is_text;
+
+    static const std::size_t default_buff_size = (std::size_t)1 << 20;
+}; // class istreambuf
+
+class ostreambuf
+    : public std::streambuf
+{
+public:
+    ostreambuf(std::streambuf * _sbuf_p,
+               std::size_t _buff_size = default_buff_size, int _level = Z_DEFAULT_COMPRESSION)
+        : sbuf_p(_sbuf_p),
+          zstrm_p(new detail::z_stream_wrapper(false, _level)),
+          buff_size(_buff_size)
+    {
+        assert(sbuf_p);
+        in_buff = new char [buff_size];
+        out_buff = new char [buff_size];
+        setp(in_buff, in_buff + buff_size);
+    }
+
+    ostreambuf(const ostreambuf &) = delete;
+    ostreambuf(ostreambuf &&) = default;
+    ostreambuf & operator = (const ostreambuf &) = delete;
+    ostreambuf & operator = (ostreambuf &&) = default;
+
+    int deflate_loop(int flush)
+    {
+        while (true)
+        {
+            zstrm_p->next_out = reinterpret_cast< decltype(zstrm_p->next_out) >(out_buff);
+            zstrm_p->avail_out = buff_size;
+            int ret = deflate(zstrm_p, flush);
+            if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) throw Exception(zstrm_p, ret);
+            std::streamsize sz = sbuf_p->sputn(out_buff, reinterpret_cast< decltype(out_buff) >(zstrm_p->next_out) - out_buff);
+            if (sz != reinterpret_cast< decltype(out_buff) >(zstrm_p->next_out) - out_buff)
+            {
+                // there was an error in the sink stream
+                return -1;
+            }
+            if (ret == Z_STREAM_END || ret == Z_BUF_ERROR || sz == 0)
+            {
+                break;
+            }
+        }
+        return 0;
+    }
+
+    virtual ~ostreambuf()
+    {
+        // flush the zlib stream
+        //
+        // NOTE: Errors here (sync() return value not 0) are ignored, because we
+        // cannot throw in a destructor. This mirrors the behaviour of
+        // std::basic_filebuf::~basic_filebuf(). To see an exception on error,
+        // close the ofstream with an explicit call to close(), and do not rely
+        // on the implicit call in the destructor.
+        //
+        sync();
+        delete [] in_buff;
+        delete [] out_buff;
+        delete zstrm_p;
+    }
+    virtual std::streambuf::int_type overflow(std::streambuf::int_type c = traits_type::eof())
+    {
+        zstrm_p->next_in = reinterpret_cast< decltype(zstrm_p->next_in) >(pbase());
+        zstrm_p->avail_in = pptr() - pbase();
+        while (zstrm_p->avail_in > 0)
+        {
+            int r = deflate_loop(Z_NO_FLUSH);
+            if (r != 0)
+            {
+                setp(nullptr, nullptr);
+                return traits_type::eof();
+            }
+        }
+        setp(in_buff, in_buff + buff_size);
+        return traits_type::eq_int_type(c, traits_type::eof()) ? traits_type::eof() : sputc(c);
+    }
+    virtual int sync()
+    {
+        // first, call overflow to clear in_buff
+        overflow();
+        if (! pptr()) return -1;
+        // then, call deflate asking to finish the zlib stream
+        zstrm_p->next_in = nullptr;
+        zstrm_p->avail_in = 0;
+        if (deflate_loop(Z_FINISH) != 0) return -1;
+        deflateReset(zstrm_p);
+        return 0;
+    }
+private:
+    std::streambuf * sbuf_p;
+    char * in_buff;
+    char * out_buff;
+    detail::z_stream_wrapper * zstrm_p;
+    std::size_t buff_size;
+
+    static const std::size_t default_buff_size = (std::size_t)1 << 20;
+}; // class ostreambuf
+
+class istream
+    : public std::istream
+{
+public:
+    istream(std::istream & is)
+        : std::istream(new istreambuf(is.rdbuf()))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    explicit istream(std::streambuf * sbuf_p)
+        : std::istream(new istreambuf(sbuf_p))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    virtual ~istream()
+    {
+        delete rdbuf();
+    }
+}; // class istream
+
+class ostream
+    : public std::ostream
+{
+public:
+    ostream(std::ostream & os)
+        : std::ostream(new ostreambuf(os.rdbuf()))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    explicit ostream(std::streambuf * sbuf_p)
+        : std::ostream(new ostreambuf(sbuf_p))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    virtual ~ostream()
+    {
+        delete rdbuf();
+    }
+}; // class ostream
+
+namespace detail
+{
+
+template < typename FStream_Type >
+struct strict_fstream_holder
+{
+    strict_fstream_holder(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+        : _fs(filename, mode)
+    {}
+    FStream_Type _fs;
+}; // class strict_fstream_holder
+
+} // namespace detail
+
+class ifstream
+    : private detail::strict_fstream_holder< strict_fstream::ifstream >,
+      public std::istream
+{
+public:
+    explicit ifstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in)
+        : detail::strict_fstream_holder< strict_fstream::ifstream >(filename, mode),
+          std::istream(new istreambuf(_fs.rdbuf()))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    virtual ~ifstream()
+    {
+        if (rdbuf()) delete rdbuf();
+    }
+}; // class ifstream
+
+class ofstream
+    : private detail::strict_fstream_holder< strict_fstream::ofstream >,
+      public std::ostream
+{
+public:
+    explicit ofstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out)
+        : detail::strict_fstream_holder< strict_fstream::ofstream >(filename, mode | std::ios_base::binary),
+          std::ostream(new ostreambuf(_fs.rdbuf()))
+    {
+        exceptions(std::ios_base::badbit);
+    }
+    virtual ~ofstream()
+    {
+        if (rdbuf()) delete rdbuf();
+    }
+}; // class ofstream
+
+} // namespace zstr
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 913ab17d..09864161 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -83,7 +83,9 @@ add_library(marian STATIC
 
   $<TARGET_OBJECTS:libyaml-cpp>
   $<TARGET_OBJECTS:SQLiteCpp>
+  $<TARGET_OBJECTS:pathie-cpp>
 )
+target_compile_options(marian PUBLIC ${ALL_WARNINGS})
 
 # Generate git_revision.h to reflect current git revision information
 # [https://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake]
@@ -110,6 +112,8 @@ cuda_add_library(marian_cuda
   training/gradient_dropping/gpu/dropper.cu
   training/gradient_dropping/gpu/sparse_algorithm.cu
   STATIC)
+  
+  target_compile_options(marian_cuda PUBLIC ${ALL_WARNINGS})
 endif(CUDA_FOUND)
 
 set_target_properties(marian PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
@@ -117,18 +121,23 @@ set_target_properties(marian PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY
 
 add_executable(marian_train command/marian_main.cpp)
 set_target_properties(marian_train PROPERTIES OUTPUT_NAME marian)
+target_compile_options(marian_train PUBLIC ${ALL_WARNINGS})
 
 add_executable(marian_decoder command/marian_decoder.cpp)
 set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder)
+target_compile_options(marian_decoder PUBLIC ${ALL_WARNINGS})
 
 add_executable(marian_scorer command/marian_scorer.cpp)
 set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer)
+target_compile_options(marian_scorer PUBLIC ${ALL_WARNINGS})
 
 add_executable(marian_vocab command/marian_vocab.cpp)
 set_target_properties(marian_vocab PROPERTIES OUTPUT_NAME marian-vocab)
+target_compile_options(marian_vocab PUBLIC ${ALL_WARNINGS})
 
 add_executable(marian_conv command/marian_conv.cpp)
 set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv)
+target_compile_options(marian_conv PUBLIC ${ALL_WARNINGS})
 
 set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv)
 
@@ -165,6 +174,7 @@ endif()
 if(COMPILE_SERVER)
   add_executable(marian_server command/marian_server.cpp)
   set_target_properties(marian_server PROPERTIES OUTPUT_NAME marian-server)
+  target_compile_options(marian_server PUBLIC ${ALL_WARNINGS})
   set(EXECUTABLES ${EXECUTABLES} marian_server)
 endif(COMPILE_SERVER)
 
diff --git a/src/command/marian_vocab.cpp b/src/command/marian_vocab.cpp
index d53dc5f0..de8ef3c7 100755
--- a/src/command/marian_vocab.cpp
+++ b/src/command/marian_vocab.cpp
@@ -25,9 +25,7 @@ int main(int argc, char** argv) {
   LOG(info, "Creating vocabulary...");
 
   auto vocab = New<Vocab>(options, 0);
-  io::InputFileStream corpusStrm(std::cin);
-  io::OutputFileStream vocabStrm(std::cout);
-  vocab->create(corpusStrm, vocabStrm, options->get<size_t>("max-size"));
+  vocab->create("stdout", "stdin", options->get<size_t>("max-size"));
 
   LOG(info, "Finished");
 
diff --git a/src/common/cli_wrapper.cpp b/src/common/cli_wrapper.cpp
index 0e230c04..28826bb2 100755
--- a/src/common/cli_wrapper.cpp
+++ b/src/common/cli_wrapper.cpp
@@ -1,6 +1,8 @@
 #include "common/cli_wrapper.h"
+#include "common/cli_helper.h"
 #include "common/logging.h"
 #include "common/options.h"
+#include "common/timer.h"
 #include "common/version.h"
 
 namespace marian {
@@ -85,8 +87,7 @@ CLIWrapper::CLIWrapper(YAML::Node &config,
   app_->formatter(fmt);
 
   // add --version option
-  optVersion_
-      = app_->add_flag("--version", "Print the version number and exit");
+  optVersion_ = app_->add_flag("--version", "Print the version number and exit");
   optVersion_->group(defaultGroup_);
 }
 
@@ -96,20 +97,12 @@ CLIWrapper::CLIWrapper(Ptr<marian::Options> options,
                        const std::string &footer,
                        size_t columnWidth,
                        size_t screenWidth)
-    : CLIWrapper(options->getYaml(),
-                 description,
-                 header,
-                 footer,
-                 columnWidth,
-                 screenWidth) {}
+    : CLIWrapper(options->getYaml(), description, header, footer, columnWidth, screenWidth) {}
 
 CLIWrapper::~CLIWrapper() {}
 
 void CLIWrapper::switchGroup(const std::string &name) {
-  if(name.empty())
-    currentGroup_ = defaultGroup_;
-  else
-    currentGroup_ = name;
+  currentGroup_ = name.empty() ? defaultGroup_ : name;
 }
 
 void CLIWrapper::parse(int argc, char **argv) {
@@ -126,25 +119,77 @@ void CLIWrapper::parse(int argc, char **argv) {
   }
 }
 
-std::string CLIWrapper::failureMessage(const CLI::App *app,
-                                       const CLI::Error &e) {
+std::string CLIWrapper::failureMessage(const CLI::App *app, const CLI::Error &e) {
   std::string header = "Error: " + std::string(e.what()) + "\n";
   if(app->get_help_ptr() != nullptr)
-    header += "Run with " + app->get_help_ptr()->get_name()
-              + " for more information.\n";
+    header += "Run with " + app->get_help_ptr()->get_name() + " for more information.\n";
   return header;
 }
 
-void CLIWrapper::overwriteDefault(const YAML::Node &node) {
-  // iterate requested default values
-  for(auto it : node) {
+bool CLIWrapper::updateConfig(const YAML::Node &config) {
+  bool success = true;
+  auto cmdOptions = getParsedOptionNames();
+  for(auto it : config) {
     auto key = it.first.as<std::string>();
-    ABORT_IF(!allVars_.count(key), "The following option was not expected: '{}'", key);
-    // if we have an option but it was not specified on command-line
-    if(allVars_.count(key) > 0 && opts_.at(key)->empty()) {
+    // skip options specified via command-line to allow overwriting them
+    if(cmdOptions.count(key))
+      continue;
+    if(options_.count(key)) {
       config_[key] = YAML::Clone(it.second);
+      options_[key].modified = true;
+    } else {
+      success = false;
     }
   }
+  return success;
+}
+
+std::string CLIWrapper::dumpConfig(bool skipDefault /*= false*/) const {
+  YAML::Emitter out;
+  out << YAML::Comment("Marian configuration file generated at " + timer::currentDate()
+                       + " with version " + buildVersion());
+  out << YAML::BeginMap;
+  std::string comment;
+  for(const auto &key : getOrderedOptionNames()) {
+    // do not proceed keys that are removed from config_
+    if(!config_[key])
+      continue;
+    if(skipDefault && !options_.at(key).modified)
+      continue;
+    auto group = options_.at(key).opt->get_group();
+    if(comment != group) {
+      if(!comment.empty())
+        out << YAML::Newline;
+      comment = group;
+      out << YAML::Comment(group);
+    }
+    out << YAML::Key;
+    out << key;
+    out << YAML::Value;
+    cli::OutputYaml(config_[key], out);
+  }
+  out << YAML::EndMap;
+  return out.c_str();
+}
+
+std::unordered_set<std::string> CLIWrapper::getParsedOptionNames() const {
+  std::unordered_set<std::string> keys;
+  for(const auto &it : options_)
+    if(!it.second.opt->empty())
+      keys.emplace(it.first);
+  return keys;
+}
+
+std::vector<std::string> CLIWrapper::getOrderedOptionNames() const {
+  std::vector<std::string> keys;
+  // extract all option names
+  for(auto const &it : options_)
+    keys.push_back(it.first);
+  // sort option names by creation index
+  sort(keys.begin(), keys.end(), [this](const std::string &a, const std::string &b) {
+    return options_.at(a).idx < options_.at(b).idx;
+  });
+  return keys;
 }
 
 }  // namespace cli
diff --git a/src/common/cli_wrapper.h b/src/common/cli_wrapper.h
index 67f2dff4..cf47a310 100755
--- a/src/common/cli_wrapper.h
+++ b/src/common/cli_wrapper.h
@@ -8,6 +8,7 @@
 #include <iostream>
 #include <map>
 #include <string>
+#include <unordered_set>
 
 namespace marian {
 
@@ -46,6 +47,19 @@ private:
   size_t screenWidth_{0};
 };
 
+// @TODO: in this file review the use of naked pointers. We use Ptr<Type> anywhere else,
+// what's up with that?
+
+/**
+ * The helper structure storing an option object, the associated variable and creation index.
+ */
+struct CLIOptionTuple {
+  CLI::Option *opt;
+  Ptr<any_type> var;
+  size_t idx{0};
+  bool modified{false};
+};
+
 /**
  * @brief The class used to define and parse command-line arguments.
  *
@@ -63,10 +77,10 @@ private:
  */
 class CLIWrapper {
 private:
-  // [option name] -> option value
-  std::map<std::string, Ptr<any_type>> allVars_;
-  // Map with option names and objects
-  std::map<std::string, CLI::Option *> opts_;
+  // Map with option names and option tuples
+  std::unordered_map<std::string, CLIOptionTuple> options_;
+  // Counter for created options
+  size_t counter_{0};
   // Command-line argument parser
   Ptr<CLI::App> app_;
 
@@ -75,23 +89,22 @@ private:
   // Name of the current option group
   std::string currentGroup_{""};
 
-  // If this is a wrapper then this should just be a reference,
-  // then we do not have the added level of containment.
+  // Reference to the main config object
   YAML::Node &config_;
 
   // Option for --version flag. This is a special flag and similarly to --help,
   // the key "version" will be not added into the YAML config
-  CLI::Option* optVersion_;
+  CLI::Option *optVersion_;
 
   static std::string failureMessage(const CLI::App *app, const CLI::Error &e);
 
-  // Extract an option name from comma-separated list of command-line arguments,
-  // e.g. 'help' from '--help,-h'
+  // Extract option name from a comma-separated list of long and short options, e.g. 'help' from
+  // '--help,-h'
   std::string keyName(const std::string &args) const {
     // re-use existing functions from CLI11 to keep option names consistent
-    return std::get<1>(CLI::detail::get_names(CLI::detail::split_names(
-                           args)))  // get long names only
-        .front();                   // get first long name
+    return std::get<1>(
+               CLI::detail::get_names(CLI::detail::split_names(args)))  // get long names only
+        .front();                                                       // get first long name
   }
 
 public:
@@ -112,23 +125,14 @@ public:
              const std::string &description = "",
              const std::string &header = "General options",
              const std::string &footer = "",
-             size_t columnWidth = 35,
+             size_t columnWidth = 40,
              size_t screenWidth = 0);
 
   /**
    * @brief Create an instance of the command-line argument parser,
    * short-cuft for Options object.
    *
-   * Option --help, -h is automatically added.
-   *
-   * @param options A smart pointer to the Options object containing the
-   *  to-be-wrapped yaml tree
-   * @param description Program description
-   * @param header Header text for the main option group
-   * @param footer Text displayed after the list of options
-   * @param columnWidth Width of the column with option names
-   * @param screenWidth Maximum allowed width for help messages, 0 means no
-   *  limit
+   * @see Other constructor
    */
   CLIWrapper(Ptr<Options> options,
              const std::string &description = "",
@@ -201,8 +205,7 @@ public:
    * have a default value or be non-defaulted
    */
   template <typename T>
-  CLI::Option *add_nondefault(const std::string &args,
-                              const std::string &help) {
+  CLI::Option *add_nondefault(const std::string &args, const std::string &help) {
     return add_option<T>(keyName(args),
                          args,
                          help,
@@ -212,8 +215,7 @@ public:
   }
 
   /**
-   * Switch to different option group or to the default group if
-   * argument is empty.
+   * Switch to different option group or to the default group if argument is empty.
    *
    * @param name Header of the option group
    */
@@ -222,23 +224,31 @@ public:
   // Parse command-line arguments. Handles --help and --version options
   void parse(int argc, char **argv);
 
-  /**
+  /*
    * @brief Overwrite values for unparsed options
    *
-   * Default values are overwritten with the options found in the config
-   * provided as the argument, while parsed command-line options remain
-   * unchanged
+   * Default values are overwritten with the options from the config provided, while parsed
+   * command-line options remain unchanged.
+   * This should be a preferred way of updating config options as the class keeps track of options,
+   * which values have changed.
    *
    * @param node YAML config with new default values for options
    */
-  void overwriteDefault(const YAML::Node &node);
+  bool updateConfig(const YAML::Node &config);
+
+  // Get textual YAML representation of the config
+  std::string dumpConfig(bool skipDefault = false) const;
 
 private:
-  template <
-      typename T,
-      // options with numeric and string-like values
-      CLI::enable_if_t<!CLI::is_bool<T>::value && !CLI::is_vector<T>::value,
-                       CLI::detail::enabler> = CLI::detail::dummy>
+  // Get names of options passed via command-line
+  std::unordered_set<std::string> getParsedOptionNames() const;
+  // Get option names in the same order as they are created
+  std::vector<std::string> getOrderedOptionNames() const;
+
+  template <typename T,
+            // options with numeric and string-like values
+            CLI::enable_if_t<!CLI::is_bool<T>::value && !CLI::is_vector<T>::value,
+                             CLI::detail::enabler> = CLI::detail::dummy>
   CLI::Option *add_option(const std::string &key,
                           const std::string &args,
                           const std::string &help,
@@ -248,13 +258,17 @@ private:
     // define YAML entry if requested
     if(addToConfig)
       config_[key] = val;
-    // create variable for the option
-    allVars_.insert(std::make_pair(key, std::make_shared<any_type>(val)));
+
+    // create option tuple
+    CLIOptionTuple option;
+    option.idx = counter_++;
+    option.var = std::make_shared<any_type>(val);
 
     // callback function collecting a command-line argument
     CLI::callback_t fun = [this, key](CLI::results_t res) {
+      options_[key].modified = true;
       // get variable associated with the option
-      auto &var = allVars_[key]->as<T>();
+      auto &var = options_[key].var->as<T>();
       // store parser result in var
       auto ret = CLI::detail::lexical_cast(res[0], var);
       // update YAML entry
@@ -275,15 +289,15 @@ private:
       opt->default_str(ss.str());
     }
 
-    // store option object
-    opts_.insert(std::make_pair(key, opt));
-    return opts_[key];
+    // store option tuple
+    option.opt = opt;
+    options_.insert(std::make_pair(key, option));
+    return options_[key].opt;
   }
 
   template <typename T,
             // options with vector values
-            CLI::enable_if_t<CLI::is_vector<T>::value,
-                             CLI::detail::enabler> = CLI::detail::dummy>
+            CLI::enable_if_t<CLI::is_vector<T>::value, CLI::detail::enabler> = CLI::detail::dummy>
   CLI::Option *add_option(const std::string &key,
                           const std::string &args,
                           const std::string &help,
@@ -293,13 +307,17 @@ private:
     // define YAML entry if requested
     if(addToConfig)
       config_[key] = val;
-    // create variable for the option
-    allVars_.insert(std::make_pair(key, std::make_shared<any_type>(val)));
+
+    // create option tuple
+    CLIOptionTuple option;
+    option.idx = counter_++;
+    option.var = std::make_shared<any_type>(val);
 
     // callback function collecting command-line arguments
     CLI::callback_t fun = [this, key](CLI::results_t res) {
+      options_[key].modified = true;
       // get vector variable associated with the option
-      auto &vec = allVars_[key]->as<T>();
+      auto &vec = options_[key].var->as<T>();
       vec.clear();
       bool ret = true;
       // handle '[]' as an empty vector
@@ -330,15 +348,15 @@ private:
     if(defaulted)
       opt->default_str(CLI::detail::join(val));
 
-    // store option object
-    opts_.insert(std::make_pair(key, opt));
-    return opts_[key];
+    // store option tuple
+    option.opt = opt;
+    options_.insert(std::make_pair(key, option));
+    return options_[key].opt;
   }
 
   template <typename T,
             // options with boolean values, called flags in CLI11
-            CLI::enable_if_t<CLI::is_bool<T>::value,
-                             CLI::detail::enabler> = CLI::detail::dummy>
+            CLI::enable_if_t<CLI::is_bool<T>::value, CLI::detail::enabler> = CLI::detail::dummy>
   CLI::Option *add_option(const std::string &key,
                           const std::string &args,
                           const std::string &help,
@@ -348,19 +366,23 @@ private:
     // define YAML entry if requested
     if(addToConfig)
       config_[key] = val;
-    // create variable for the option
-    allVars_.insert(std::make_pair(key, std::make_shared<any_type>(val)));
+
+    // create option tuple
+    CLIOptionTuple option;
+    option.idx = counter_++;
+    option.var = std::make_shared<any_type>(val);
 
     // callback function setting the flag
     CLI::callback_t fun = [this, key](CLI::results_t res) {
+      options_[key].modified = true;
       // get parser result, it is safe as boolean options have an implicit value
       auto val = res[0];
       auto ret = true;
       if(val == "true" || val == "on" || val == "yes" || val == "1") {
-        allVars_[key]->as<T>() = true;
+        options_[key].var->as<T>() = true;
         config_[key] = true;
       } else if(val == "false" || val == "off" || val == "no" || val == "0") {
-        allVars_[key]->as<T>() = false;
+        options_[key].var->as<T>() = false;
         config_[key] = false;
       } else {
         ret = false;
@@ -378,9 +400,10 @@ private:
     // allow to use the flag without any argument
     opt->implicit_val("true");
 
-    // store option object
-    opts_.insert(std::make_pair(key, opt));
-    return opts_[key];
+    // store option tuple
+    option.opt = opt;
+    options_.insert(std::make_pair(key, option));
+    return options_[key].opt;
   }
 };
 
diff --git a/src/common/config.cpp b/src/common/config.cpp
index c5209008..e5208b0d 100755
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -38,7 +38,7 @@ void Config::initialize(int argc, char** argv, cli::mode mode, bool validate) {
     std::string quote; // attempt to quote special chars
     if (arg.empty() || arg.find_first_of(" #`\"'\\${}|&^?*!()%><") != std::string::npos)
       quote = "'";
-    arg = regex::regex_replace(arg, std::regex("'"), "'\\''");
+    arg = regex::regex_replace(arg, regex::regex("'"), "'\\''");
     if (!cmdLine.empty())
       cmdLine.push_back(' ');
     cmdLine += quote + arg + quote;
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index e7676b01..539579a1 100755
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -77,8 +77,9 @@ void ConfigParser::addOptionsGeneral(cli::CLIWrapper& cli) {
      "allow the use of environment variables in paths, of the form ${VAR_NAME}");
   cli.add<bool>("--relative-paths",
      "All paths are relative to the config file location");
-  cli.add<bool>("--dump-config",
-     "Dump current (modified) configuration to stdout and exit");
+  cli.add_nondefault<std::string>("--dump-config",
+     "Dump current (modified) configuration to stdout and exit. Possible values: full, minimal")
+    ->implicit_val("full");
   // clang-format on
 }
 
@@ -255,8 +256,13 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
       "If these files do not exist they are created");
 #ifdef USE_SENTENCEPIECE
   cli.add<std::vector<float>>("--sentencepiece-alphas",
-                              "Sampling factors for SentencePieceVocab;"
-                              "i-th factor corresponds to i-th vocabulary");
+      "Sampling factors for SentencePiece vocabulary; i-th factor corresponds to i-th vocabulary");
+  cli.add<std::string>("--sentencepiece-options",
+      "Pass-through command-line options to SentencePiece trainer");
+  cli.add<size_t>("--sentencepiece-max-lines",
+      "Maximum lines to train SentencePiece vocabulary, selected with sampling from all data. "
+      "When set to 0 all lines are going to be used.",
+      10000000);
 #endif
   // scheduling options
   cli.add<size_t>("--after-epochs,-e",
@@ -356,10 +362,10 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
      "none");
   cli.add<std::string>("--guided-alignment-cost",
      "Cost type for guided alignment: ce (cross-entropy), mse (mean square error), mult (multiplication)",
-     "ce");
+     "mse");
   cli.add<double>("--guided-alignment-weight",
      "Weight for guided alignment cost",
-     1);
+     0.1);
   cli.add_nondefault<std::string>("--data-weighting",
      "Path to a file with sentence or word weights");
   cli.add<std::string>("--data-weighting-type",
@@ -396,8 +402,8 @@ void ConfigParser::addOptionsValidation(cli::CLIWrapper& cli) {
       "Validate model every  arg  updates (append 't' for every  arg  target labels)",
       "10000u");
   cli.add<std::vector<std::string>>("--valid-metrics",
-      "Metric to use during validation: cross-entropy, perplexity, valid-script, translation."
-      " Multiple metrics can be specified",
+      "Metric to use during validation: cross-entropy, ce-mean-words, perplexity, valid-script, "
+      " translation, bleu, bleu-detok. Multiple metrics can be specified",
       std::vector<std::string>({"cross-entropy"}));
   cli.add<size_t>("--early-stopping",
      "Stop if the first validation metric does not improve for  arg  consecutive validation steps",
@@ -452,7 +458,7 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) {
       "Paths to input file(s), stdin by default",
       std::vector<std::string>({"stdin"}));
   cli.add<std::string>("--output,-o",
-      "Paths to output file(s), stdout by default",
+      "Path to output file, stdout by default",
       "stdout");
   cli.add<std::vector<std::string>>("--vocabs,-v",
       "Paths to vocabulary files have to correspond to --input");
@@ -511,6 +517,9 @@ void ConfigParser::addOptionsScoring(cli::CLIWrapper& cli) {
   // TODO: move options like vocabs and train-sets to a separate procedure as they are defined twice
   cli.add<std::vector<std::string>>("--train-sets,-t",
       "Paths to corpora to be scored: source target");
+  cli.add<std::string>("--output,-o",
+      "Path to output file, stdout by default",
+      "stdout");
   cli.add<std::vector<std::string>>("--vocabs,-v",
       "Paths to vocabulary files have to correspond to --train-sets."
       " If this parameter is not supplied we look for vocabulary files source.{yml,json} and target.{yml,json}."
@@ -519,6 +528,8 @@ void ConfigParser::addOptionsScoring(cli::CLIWrapper& cli) {
       "Score n-best list instead of plain text corpus");
   cli.add<std::string>("--n-best-feature",
       "Feature name to be inserted into n-best list", "Score");
+  cli.add<bool>("--normalize,-n",
+      "Divide translation score by translation length");
   cli.add_nondefault<std::string>("--summary",
       "Only print total cost, possible values: cross-entropy (ce-mean), ce-mean-words, ce-sum, perplexity")
       ->implicit_val("cross-entropy");
@@ -607,8 +618,40 @@ void ConfigParser::addSuboptionsInputLength(cli::CLIWrapper& cli) {
   // clang-format on
 }
 
+void ConfigParser::addSuboptionsULR(cli::CLIWrapper& cli) {
+  // clang-format off
+  // support for universal encoder ULR https://arxiv.org/pdf/1802.05368.pdf
+  cli.add<bool>("--ulr",
+      "Enable ULR (Universal Language Representation)",
+      false);
+  // reading pre-trained universal embeddings for multi-sources.
+  // Note that source and target here is relative to ULR not the translation langs
+  // queries: EQ in Fig2 : is the unified embeddings projected to one space.
+  cli.add<std::string>("--ulr-query-vectors",
+      "Path to file with universal sources embeddings from projection into universal space",
+      "");
+  // keys: EK in Fig2 : is the keys of the target embbedings projected to unified space (i.e. ENU in
+  // multi-lingual case)
+  cli.add<std::string>("--ulr-keys-vectors",
+      "Path to file with universal sources embeddings of traget keys from projection into universal space",
+      "");
+  cli.add<bool>("--ulr-trainable-transformation",
+      "Make Query Transformation Matrix A trainable",
+      false);
+  cli.add<int>("--ulr-dim-emb",
+      "ULR monolingual embeddings dimension");
+  cli.add<float>("--ulr-dropout",
+      "ULR dropout on embeddings attentions. Default is no dropout",
+      0.0f);
+  cli.add<float>("--ulr-softmax-temperature",
+      "ULR softmax temperature to control randomness of predictions. Deafult is 1.0: no temperature",
+      1.0f);
+  // clang-format on
+}
+
 void ConfigParser::expandAliases(cli::CLIWrapper& cli) {
   YAML::Node config;
+  // The order of aliases does matter as later options overwrite earlier
 
   if(config_["best-deep"].as<bool>()) {
     config["layer-normalization"] = true;
@@ -622,11 +665,10 @@ void ConfigParser::expandAliases(cli::CLIWrapper& cli) {
     config["skip"] = true;
   }
 
-  // @TODO: Quite sure CLIWrapper should not do that;
-  // that's semantics that seem to belong into the current class
-  // and has not really anything to do with CLI proper.
-  if(config)
-    cli.overwriteDefault(config);
+  if(config) {
+    auto success = cli.updateConfig(config);
+    ABORT_IF(!success, "Unknown option(s) in aliases, check if aliases consist of correct options");
+  }
 }
 
 void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
@@ -661,8 +703,8 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
   auto configPaths = findConfigPaths();
   if(!configPaths.empty()) {
     auto config = loadConfigFiles(configPaths);
-    // combine loaded options with the main config object
-    cli.overwriteDefault(config);
+    auto success = cli.updateConfig(config);
+    ABORT_IF(!success, "There are option(s) in a config file that are not expected");
   }
 
   if(get<bool>("interpolate-env-vars")) {
@@ -677,11 +719,10 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
   // remove extra config files from the config to avoid redundancy
   config_.remove("config");
 
-  if(get<bool>("dump-config")) {
+  if(has("dump-config")) {
+    bool skipDefault = get<std::string>("dump-config") == "minimal";
     config_.remove("dump-config");
-    YAML::Emitter emit;
-    cli::OutputYaml(config_, emit);
-    std::cout << emit.c_str() << std::endl;
+    std::cout << cli.dumpConfig(skipDefault) << std::endl;
     exit(0);
   }
 
@@ -714,8 +755,7 @@ std::vector<std::string> ConfigParser::findConfigPaths() {
   return paths;
 }
 
-YAML::Node ConfigParser::loadConfigFiles(
-    const std::vector<std::string>& paths) {
+YAML::Node ConfigParser::loadConfigFiles(const std::vector<std::string>& paths) {
   YAML::Node configAll;
 
   for(auto& path : paths) {
@@ -750,33 +790,4 @@ YAML::Node ConfigParser::loadConfigFiles(
 YAML::Node ConfigParser::getConfig() const {
   return config_;
 }
-
-void ConfigParser::addSuboptionsULR(cli::CLIWrapper& cli) {
-  // support for universal encoder ULR https://arxiv.org/pdf/1802.05368.pdf
-  cli.add<bool>("--ulr",
-      "Is ULR (Universal Language Representation) enabled?",
-      false);
-  // reading pre-trained universal embedings for multi-sources
-  // note that source and target here is relative to ULR not the translation  langs
-  //queries: EQ in Fig2 :  is the unified embbedins projected to one space.
-  //"Path to file with universal sources embeddings from projection into universal space")
-  cli.add<std::string>("--ulr-query-vectors",
-      "Path to file with universal sources embeddings from projection into universal space",
-      "");
-  //keys: EK in Fig2 :  is the keys of the target  embbedins projected to unified  space (i.e. ENU in multi-lingual case)
-  cli.add<std::string>("--ulr-keys-vectors",
-      "Path to file with universal sources embeddings of traget keys from projection into universal space",
-      "");
-  cli.add<bool>("--ulr-trainable-transformation",
-      "Is Query Transformation Matrix A trainable ?",
-      false);
-  cli.add<int>("--ulr-dim-emb",
-      "ULR mono embed dim");
-  cli.add<float>("--ulr-dropout",
-      "ULR dropout on embeddings attentions: default is no dropuout",
-      0.0f);
-  cli.add<float>("--ulr-softmax-temperature",
-      "ULR softmax temperature to control randomness of predictions- deafult is 1.0: no temperature ",
-      1.0f);
-}
 }  // namespace marian
diff --git a/src/common/config_parser.h b/src/common/config_parser.h
index 80f7e81c..de1cb70e 100755
--- a/src/common/config_parser.h
+++ b/src/common/config_parser.h
@@ -63,7 +63,7 @@ private:
   // Abort if not set.
   template <typename T>
   T get(const std::string& key) const {
-    ABORT_IF(!has(key), "CLI object has no key {}", key);
+    ABORT_IF(!has(key), "CLI object has no key '{}'", key);
     return config_[key].as<T>();
   }
 
diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp
index 625748e9..5086c726 100755
--- a/src/common/config_validator.cpp
+++ b/src/common/config_validator.cpp
@@ -84,9 +84,6 @@ void ConfigValidator::validateOptionsTraining() const {
   ABORT_IF(!modelDir.empty() && !filesystem::isDirectory(modelDir),
            "Model directory does not exist");
 
-  ABORT_IF(!modelDir.empty() && !filesystem::canWrite(modelDir),
-           "No write permission in model directory");
-
   ABORT_IF(
       has("valid-sets") && get<std::vector<std::string>>("valid-sets").size() != trainSets.size(),
       "There should be as many validation sets as training sets");
diff --git a/src/common/file_stream.h b/src/common/file_stream.h
index caa12a6c..87cb7f9a 100755
--- a/src/common/file_stream.h
+++ b/src/common/file_stream.h
@@ -1,38 +1,24 @@
 #pragma once
 
-// @TODO: this file still contains lots of stuff from boost::filesystem and boost::iostreams,
-// this has to be figured out.
-
 #include "common/filesystem.h"
 #include "common/logging.h"
 #include "common/definitions.h"
 
-#include <boost/filesystem/fstream.hpp>
-#include <boost/iostreams/device/file_descriptor.hpp>
-#pragma warning(push)
-#pragma warning(disable: 4458) // declaration of 'traits_type' hides class member
-#pragma warning(disable: 4456) // declaration of 'c' hides previous local declaration
-#pragma warning(disable: 4244) // conversion from 'int' to 'char', possible loss of data
-#pragma warning(disable: 4706) // assignment within conditional expression
-#include <boost/iostreams/filter/gzip.hpp>
-#pragma warning(pop)
-#ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wsuggest-override"
-#endif
-#include <boost/iostreams/filtering_stream.hpp>
-#ifdef __GNUC__
+#include "3rd_party/zstr/zstr.hpp"
 #pragma GCC diagnostic pop
-#endif
+
+#include <boost/iostreams/device/file_descriptor.hpp>
+#include <boost/iostreams/stream_buffer.hpp>
+
 #include <iostream>
 #include <memory>
 
 #ifdef _MSC_VER
-
 #include <fcntl.h>
 #include <io.h>
 #include <stdlib.h>
-
 #endif
 
 namespace marian {
@@ -40,7 +26,7 @@ namespace io {
 
 class TemporaryFile {
 private:
-  int fd_;
+  int fd_{-1};
   bool unlink_;
   std::string name_;
 
@@ -145,79 +131,78 @@ public:
 
 class InputFileStream {
 public:
-  InputFileStream(const std::string& file) : file_(file), ifstream_(file_.getBoost()) {
-    ABORT_IF(
-        !marian::filesystem::exists(file_), "File '{}' does not exist", file);
-
-    if(file_.extension() == marian::filesystem::Path(std::string(".gz")))
-      istream_.push(boost::iostreams::gzip_decompressor());
-    istream_.push(ifstream_);
+  InputFileStream(const std::string& file)
+  : file_(file) {
+    ABORT_IF(!marian::filesystem::exists(file_), "File '{}' does not exist", file);
+
+    if(file_.extension() == marian::filesystem::Path(".gz"))
+      // @TODO: consider make_unique for next refactoring
+      istream_.reset(new zstr::ifstream(file_.string()));
+    else
+      istream_.reset(new std::ifstream(file_.string()));
   }
 
   InputFileStream(TemporaryFile& tempfile)
       : fds_(tempfile.getFileDescriptor(), boost::iostreams::never_close_handle) {
     lseek(tempfile.getFileDescriptor(), 0, SEEK_SET);
-    istream_.push(fds_, 1024);
+
+    namespace bio = boost::iostreams;
+    fdsBuffer_.reset(new bio::stream_buffer<bio::file_descriptor_source>(fds_));
+    istream_.reset(new std::istream(fdsBuffer_.get()));
   }
 
-  InputFileStream(std::istream& strm) { istream_.push(strm, 0); }
+  InputFileStream(std::istream& strm)
+  : istream_(new std::istream(strm.rdbuf())) {}
 
-  operator std::istream&() { return istream_; }
+  operator std::istream&() { return *istream_; }
 
-  operator bool() { return (bool)istream_; }
+  operator bool() { return (bool)*istream_; }
 
   bool bad() const {
-    return istream_.bad();
+    return istream_->bad();
   }
 
   bool fail() const {
-    return istream_.fail();
+    return istream_->fail();
   }
 
   char widen(char c) {
-    return istream_.widen(c);
-  }
-
-  bool isOpen() const {
-    return ifstream_.is_open();
+    return istream_->widen(c);
   }
 
   std::string path() { return file_.string(); }
 
-  bool empty() { return ifstream_.peek() == std::ifstream::traits_type::eof(); }
+  bool empty() { return istream_->peek() == std::ifstream::traits_type::eof(); }
 
   void setbufsize(size_t size) const {
-    ifstream_.rdbuf()->pubsetbuf(0, 0);
-    //readBuf_ = std::make_unique<char[]>(size);
+    istream_->rdbuf()->pubsetbuf(0, 0);
     readBuf_.reset(new char[size]);
-    ifstream_.rdbuf()->pubsetbuf(readBuf_.get(), 0);
+    istream_->rdbuf()->pubsetbuf(readBuf_.get(), 0);
   }
 
   template <typename T>
   friend InputFileStream& operator>>(InputFileStream& stream, T& t) {
-    stream.istream_ >> t;
+    *stream.istream_ >> t;
     // bad() seems to be correct here. Should not abort on EOF.
-    ABORT_IF(stream.bad(),
-             "Error reading from file '{}'",
-             stream.path());
+    ABORT_IF(stream.bad(), "Error reading from file '{}'", stream.path());
     return stream;
   }
 
   template <typename T>
   size_t read(T* ptr, size_t num = 1) {
-    istream_.read((char*)ptr, num * sizeof(T));
+    istream_->read((char*)ptr, num * sizeof(T));
     // fail() seems to be correct here. Failure to read should abort.
-    ABORT_IF(fail(),
-             "Error reading from file '{}'",
-             path());
+    ABORT_IF(fail(), "Error reading from file '{}'", path());
     return num * sizeof(T);
   }
 
 private:
   marian::filesystem::Path file_;
-  boost::filesystem::ifstream ifstream_;
+  std::unique_ptr<std::istream> istream_;
+
   boost::iostreams::file_descriptor_source fds_;
-  boost::iostreams::filtering_istream istream_;
+  std::unique_ptr<boost::iostreams::stream_buffer<boost::iostreams::file_descriptor_source>> fdsBuffer_;
+
   mutable UPtr<char[]> readBuf_; // for setbuf()
 };
 
@@ -226,9 +211,7 @@ private:
 static inline InputFileStream& getline(InputFileStream& in, std::string& line) {
   std::getline((std::istream&)in, line);
   // bad() seems to be correct here. Should not abort on EOF.
-  ABORT_IF(in.bad(),
-           "Error reading from file '{}'",
-           in.path());
+  ABORT_IF(in.bad(), "Error reading from file '{}'", in.path());
   // strip terminal CR if present
   if(in && !line.empty() && line.back() == in.widen('\r'))
     line.pop_back();
@@ -240,9 +223,7 @@ static inline InputFileStream& getline(InputFileStream& in, std::string& line) {
 static inline InputFileStream& getline(InputFileStream& in, std::string& line, char delim) {
   std::getline((std::istream&)in, line, delim);
   // bad() seems to be correct here. Should not abort on EOF.
-  ABORT_IF(in.bad(),
-           "Error reading from file '{}'",
-           in.path());
+  ABORT_IF(in.bad(), "Error reading from file '{}'", in.path());
   // strip terminal CR if present
   if(in && !line.empty() && line.back() == in.widen('\r'))
     line.pop_back();
@@ -251,62 +232,61 @@ static inline InputFileStream& getline(InputFileStream& in, std::string& line, c
 
 class OutputFileStream {
 public:
-  OutputFileStream(const std::string& file) : file_(file), ofstream_(file_.getBoost()) {
-    ABORT_IF(
-        !marian::filesystem::exists(file_), "File '{}' does not exist", file);
+  OutputFileStream(const std::string& file) : file_(file) {
+    if(file_.extension() == marian::filesystem::Path(".gz"))
+      ostream_.reset(new zstr::ofstream(file_.string()));
+    else
+      ostream_.reset(new std::ofstream(file_.string()));
 
-    if(file_.extension() == marian::filesystem::Path(std::string(".gz")))
-      ostream_.push(boost::iostreams::gzip_compressor());
-    ostream_.push(ofstream_);
+    ABORT_IF(!marian::filesystem::exists(file_), "File '{}' could not be opened", file);
   }
 
   OutputFileStream(TemporaryFile& tempfile)
       : fds_(tempfile.getFileDescriptor(), boost::iostreams::never_close_handle) {
     lseek(tempfile.getFileDescriptor(), 0, SEEK_SET);
-    ostream_.push(fds_, 1024);
+
+    namespace bio = boost::iostreams;
+    fdsBuffer_.reset(new bio::stream_buffer<bio::file_descriptor_sink>(fds_));
+    ostream_.reset(new std::ostream(fdsBuffer_.get()));
   }
 
-  OutputFileStream(std::ostream& strm) { ostream_.push(strm, 0); }
+  OutputFileStream(std::ostream& strm) {
+    ostream_.reset(new std::ostream(strm.rdbuf()));
+  }
 
-  operator std::ostream&() { return ostream_; }
+  operator std::ostream&() { return *ostream_; }
 
-  operator bool() { return (bool)ostream_; }
+  operator bool() { return (bool)*ostream_; }
 
   bool bad() const {
-    return ostream_.bad();
+    return ostream_->bad();
   }
 
   bool fail() const {
-    return ostream_.fail();
+    return ostream_->fail();
   }
 
   template <typename T>
   friend OutputFileStream& operator<<(OutputFileStream& stream, const T& t) {
-    stream.ostream_ << t;
+    *stream.ostream_ << t;
     // fail() seems to be correct here. Failure to write should abort.
-    ABORT_IF(stream.fail(),
-             "Error writing to file '{}'",
-             stream.path());
+    ABORT_IF(stream.fail(), "Error writing to file '{}'", stream.path());
     return stream;
   }
 
   // handle things like std::endl which is actually a function not a value
   friend OutputFileStream& operator<<(OutputFileStream& stream, std::ostream& (*var)(std::ostream&)) {
-    stream.ostream_ << var;
+    *stream.ostream_ << var;
     // fail() seems to be correct here. Failure to write should abort.
-    ABORT_IF(stream.fail(),
-             "Error writing to file '{}'",
-             stream.path());
+    ABORT_IF(stream.fail(), "Error writing to file '{}'", stream.path());
     return stream;
   }
 
   template <typename T>
   size_t write(const T* ptr, size_t num = 1) {
-    ostream_.write((char*)ptr, num * sizeof(T));
+    ostream_->write((char*)ptr, num * sizeof(T));
     // fail() seems to be correct here. Failure to write should abort.
-    ABORT_IF(fail(),
-             "Error writing to file '{}'",
-             path());
+    ABORT_IF(fail(), "Error writing to file '{}'", path());
     return num * sizeof(T);
   }
 
@@ -314,9 +294,10 @@ public:
 
 private:
   marian::filesystem::Path file_;
-  boost::filesystem::ofstream ofstream_;
+  std::unique_ptr<std::ostream> ostream_;
+
   boost::iostreams::file_descriptor_sink fds_;
-  boost::iostreams::filtering_ostream ostream_;
+  std::unique_ptr<boost::iostreams::stream_buffer<boost::iostreams::file_descriptor_sink>> fdsBuffer_;
 };
 
 }
diff --git a/src/common/filesystem.h b/src/common/filesystem.h
index 66927313..9dd0ae55 100755
--- a/src/common/filesystem.h
+++ b/src/common/filesystem.h
@@ -1,66 +1,57 @@
 #pragma once
 
-// @TODO: This is a temporary file to move every function from boost::filesystem used in Marian
-// into one place. Marian should call functions only from this file. boost::filesystem will
-// be removed. This needs to be portable to Windows too.
+// This is a shallow wrapper around a filesystem path library.
+// We used this to wrap boost::filesystem, now we are wrapping
+// Pathie, a small open source lib.
 
+// @TODO: go back to canonical names for functions and objects
+// as specified in C++17 so it becomes easy to move in the future
 
-#ifdef __GNUC__
 #pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-variable"
 #pragma GCC diagnostic ignored "-Wsuggest-override"
-#endif
-#include <boost/filesystem.hpp>
-#ifdef __GNUC__
-// "ignored -Wunused-variable" above ignores 'static const' declarations (where 'static'
-// is not needed). We work around by referencing the offending variables in dummy code.
-static inline void boost_dummy_filesystem() { boost::system::posix_category; boost::system::errno_ecat; boost::system::native_ecat; }
+#include "3rd_party/pathie-cpp/include/path.hpp"
+#include "3rd_party/pathie-cpp/include/errors.hpp"
 #pragma GCC diagnostic pop
-#endif
 
 namespace marian {
 namespace filesystem {
 
-  struct Path {
+  class Path {
     private:
-      boost::filesystem::path path;
+      Pathie::Path path;
 
     public:
       Path() {}
       Path(const Path& p) : path{p.path} {}
       Path(const std::string& s) : path{s} {}
-      Path(const boost::filesystem::path& p) : path{p} {}
+      Path(const Pathie::Path& p) : path{p} {}
 
       Path parentPath() const {
-        return Path{path.parent_path()};
+        return Path(path.parent());
       }
 
       Path filename() const {
-        return Path{path.filename()};
+        return Path(path.basename());
       }
 
       Path extension() const {
-        return Path{path.extension()};
+        return Path(path.extension());
       }
 
       bool empty() const {
-        return path.empty();
+        return path.str().empty();
       }
 
-      const boost::filesystem::path& getBoost() const {
+      const Pathie::Path& getImpl() const {
         return path;
       }
 
-      operator std::string&() {
-        return (std::string&)path;
-      }
-
       operator std::string() const {
-        return path.string();
+        return path.str();
       }
 
       std::string string() const {
-        return path.string();
+        return path.str();
       }
 
       bool operator==(const Path& p) const {
@@ -73,35 +64,31 @@ namespace filesystem {
   };
 
   static inline Path currentPath() {
-    return Path{boost::filesystem::current_path()};
+    return Path(Pathie::Path::pwd());
   }
 
-  static inline Path canonical(const Path& p, const Path& dir) {
-    return Path{ boost::filesystem::canonical(p.getBoost(), dir.getBoost()) };
+  static inline Path canonical(const Path& p, const Path& base) {
+    // create absolute base path
+    return p.getImpl().absolute(base.getImpl()).expand();
   }
 
   static inline bool exists(const Path& p) {
-    return boost::filesystem::exists(p.getBoost());
+    return p.getImpl().exists();
   }
 
   static inline size_t fileSize(const Path& p) {
-    return boost::filesystem::file_size(p.getBoost());
+    return p.getImpl().size();
   }
 
   static inline bool isDirectory(const Path& p) {
-    return boost::filesystem::is_directory(p.getBoost());
-  }
-
-  static inline bool canWrite(const Path& p) {
-    return (boost::filesystem::status(p.getBoost()).permissions() & boost::filesystem::owner_write) != 0;
+    return p.getImpl().is_directory();
   }
 
-  // concatenation?
   static inline Path operator/ (const Path& lhs, const Path& rhs) {
-    return lhs.getBoost() / rhs.getBoost();
+    return Path(lhs.getImpl() / rhs.getImpl());
   }
 
-  using FilesystemError = boost::filesystem::filesystem_error;
+  using FilesystemError = Pathie::PathieError;
 
 }
 }
 \ No newline at end of file
diff --git a/src/common/logging.cpp b/src/common/logging.cpp
index fdb999ca..0170d633 100755
--- a/src/common/logging.cpp
+++ b/src/common/logging.cpp
@@ -150,9 +150,11 @@ void switchtoMultinodeLogging(std::string nodeIdStr) {
 
 
 namespace marian {
-  void noinline logCallStack(size_t skipLevels)
-  {
-    auto callStack = ::Microsoft::MSR::CNTK::DebugUtil::GetCallStack(skipLevels + 2, /*makeFunctionNamesStandOut=*/true);
-    checkedLog("general", "critical", "Call stack:{}", callStack);
+  std::string noinline getCallStack(size_t skipLevels) {
+    return ::Microsoft::MSR::CNTK::DebugUtil::GetCallStack(skipLevels + 2, /*makeFunctionNamesStandOut=*/true);
+  }
+
+  void noinline logCallStack(size_t skipLevels) {
+    checkedLog("general", "critical", getCallStack(skipLevels));
   }
 }
diff --git a/src/common/logging.h b/src/common/logging.h
index 091044ea..cdaa806c 100755
--- a/src/common/logging.h
+++ b/src/common/logging.h
@@ -6,6 +6,7 @@
 
 namespace marian {
   void logCallStack(size_t skipLevels);
+  std::string getCallStack(size_t skipLevels);
 }
 
 /**
@@ -46,13 +47,19 @@ namespace marian {
  *
  * @param ... Message text and variables
  */
-#define ABORT(...)                                                      \
-  do {                                                                  \
-    checkedLog("general", "critical", __VA_ARGS__);                     \
-    ::marian::logCallStack(/*skipLevels=*/0);                           \
-    std::cerr << "Aborted from " << FUNCTION_NAME << " in " << __FILE__ \
-              << ": " << __LINE__ << std::endl;                         \
-    std::abort();                                                       \
+#define ABORT(...)                                                             \
+  do {                                                                         \
+    auto logger = spdlog::get("general");                                      \
+    if(logger == nullptr)                                                      \
+      logger = createStderrLogger("general", "[%Y-%m-%d %T] Error: %v");       \
+    else                                                                       \
+      logger->set_pattern("[%Y-%m-%d %T] Error: %v");                          \
+    checkedLog("general", "critical", __VA_ARGS__);                            \
+    checkedLog("general", "critical", "Aborted from {} in {}:{}",              \
+               FUNCTION_NAME, __FILE__, __LINE__);                             \
+    logger->set_pattern("%v");                                                 \
+    checkedLog("general", "critical", marian::getCallStack(/*skipLevels=*/0)); \
+    std::abort();                                                              \
   } while(0)
 
 /**
@@ -85,18 +92,6 @@ template <class... Args>
 void checkedLog(std::string logger, std::string level, Args... args) {
   Logger log = spdlog::get(logger);
   if(!log) {
-    if(level == "critical") {
-      // log and errlog are not the same, hence we need to check
-      // if an error logger exists first and not try to create a
-      // second one. Otherwise this will throw an exception.
-      Logger errlog = spdlog::get("error");
-      if(!errlog)
-        errlog = createStderrLogger("error", "Error: %v - aborting");
-      errlog->critical(args...);
-    }
-    // @TODO: should other loggers do something? This seems to be
-    // a sink state when logs are not intialized. Critical errors
-    // should log nevertheless, non-critical go unreported.
     return;
   }
 
diff --git a/src/common/timer.h b/src/common/timer.h
index 6f86b54f..4172cfc7 100755
--- a/src/common/timer.h
+++ b/src/common/timer.h
@@ -12,6 +12,14 @@
 namespace marian {
 namespace timer {
 
+// Helper function to get the current date and time
+static std::string currentDate() {
+  std::time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
+  char date[100] = {0};
+  std::strftime(date, sizeof(date), "%F %X %z", std::localtime(&now));
+  return date;
+}
+
 // Timer measures elapsed time.
 // This is a wrapper around std::chrono providing wall time only
 class Timer {
diff --git a/src/common/version.cpp b/src/common/version.cpp
index a31c7df7..75814d92 100755
--- a/src/common/version.cpp
+++ b/src/common/version.cpp
@@ -1,12 +1,10 @@
 #include "common/version.h"
-#include "common/project_version.h" // cmake-generated file, major/minor/tweak versions
-#include "common/git_revision.h"    // make-generated file, contains git commit info
+#include "common/git_revision.h"     // make-generated file, contains git commit info
+#include "common/project_version.h"  // cmake-generated file, major/minor/tweak versions
 
 namespace marian {
 
-std::string buildVersion()
-{
+std::string buildVersion() {
   return std::string(PROJECT_VERSION) + " " + GIT_REVISION;
 }
-
 }
diff --git a/src/common/version.h b/src/common/version.h
index a0c8ab22..a425af93 100755
--- a/src/common/version.h
+++ b/src/common/version.h
@@ -3,5 +3,5 @@
 #include <string>
 
 namespace marian {
-    std::string buildVersion();
+  std::string buildVersion();
 }
diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp
index 70660467..7a7a846e 100755
--- a/src/data/corpus.cpp
+++ b/src/data/corpus.cpp
@@ -106,9 +106,12 @@ void Corpus::shuffleData(const std::vector<std::string>& paths) {
 
   size_t numStreams = paths.size();
 
+  size_t numSentences;
   std::vector<std::vector<std::string>> corpus(numStreams); // [stream][id]
-  if (!corpusInRAM_.empty()) // when caching, we use what we have instead
+  if (!corpusInRAM_.empty()) { // when caching, we use what we have instead
     corpus = std::move(corpusInRAM_); // temporarily move ownership here, will be moved back
+    numSentences = corpus[0].size();
+  }
   else {
     files_.resize(numStreams);
     for(size_t i = 0; i < numStreams; ++i) {
@@ -132,10 +135,9 @@ void Corpus::shuffleData(const std::vector<std::string>& paths) {
       ABORT_IF(eofsHit != 0, "Not all input files have the same number of lines");
     }
     files_.clear();
-    LOG(info, "[data] Done reading {} sentences.", corpus[0].size());
+    numSentences = corpus[0].size();
+    LOG(info, "[data] Done reading {} sentences", numSentences);
   }
-  size_t numSentences = corpus[0].size();
-  LOG(info, "[data] Done reading {} sentences", numSentences);
 
   // randomize sequence ids, and remember them
   ids_.resize(numSentences);
diff --git a/src/data/corpus_base.cpp b/src/data/corpus_base.cpp
index 880a6946..c9704313 100755
--- a/src/data/corpus_base.cpp
+++ b/src/data/corpus_base.cpp
@@ -75,10 +75,14 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
       if(maxVocabs.size() < paths_.size())
         maxVocabs.resize(paths_.size(), 0);
 
+      LOG(info, "No vocabulary files given, trying to find or build based on training data. "
+                "Vocabularies will be built separately for each file.");
+
       // Create vocabs if not provided
       for(size_t i = 0; i < paths_.size(); ++i) {
         Ptr<Vocab> vocab = New<Vocab>(options_, i);
-        int vocSize = vocab->loadOrCreate("", paths_[i], maxVocabs[i]);
+        std::vector<std::string> trainPaths = { paths_[i] };
+        int vocSize = vocab->loadOrCreate("", trainPaths, maxVocabs[i]);
         // TODO: this is not nice as it modifies the option object and needs to expose the changes
         // outside the corpus as models need to know about the vocabulary size; extract the vocab
         // creation functionality from the class.
@@ -92,9 +96,31 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
       if(maxVocabs.size() < vocabPaths.size())
         maxVocabs.resize(paths_.size(), 0);
 
+      // Helper object to for grouping training data based on vocabulary file name
+      struct PathsAndSize {
+        std::set<std::string> paths; // contains all paths that are used for training the vocabulary
+        size_t size;                 // contains the maximum vocabulary size
+      };
+      
+      // Group training files based on vocabulary path. If the same
+      // vocab path corresponds to different training files, this means
+      // that a single vocab should combine tokens from all files.
+      std::map<std::string, PathsAndSize> groupVocab;
+      for(size_t i = 0; i < vocabPaths.size(); ++i) {
+        groupVocab[vocabPaths[i]].paths.insert(paths_[i]);
+        if(groupVocab[vocabPaths[i]].size < maxVocabs[i])
+          groupVocab[vocabPaths[i]].size = maxVocabs[i];
+      }
+
       for(size_t i = 0; i < vocabPaths.size(); ++i) {
         Ptr<Vocab> vocab = New<Vocab>(options_, i);
-        int vocSize = vocab->loadOrCreate(vocabPaths[i], paths_[i], maxVocabs[i]);
+
+        // Get the set of files that corresponds to the vocab. If the next file is the same vocab,
+        // it wild not be created again, but just correctly loaded.
+        auto pathsAndSize = groupVocab[vocabPaths[i]];
+        std::vector<std::string> groupedPaths(pathsAndSize.paths.begin(), pathsAndSize.paths.end());
+        int vocSize = vocab->loadOrCreate(vocabPaths[i], groupedPaths, pathsAndSize.size);
+        
         // TODO: this is not nice as it modifies the option object and needs to expose the changes
         // outside the corpus as models need to know about the vocabulary size; extract the vocab
         // creation functionality from the class.
diff --git a/src/data/default_vocab.cpp b/src/data/default_vocab.cpp
index 98bf5d8f..1ce055db 100755
--- a/src/data/default_vocab.cpp
+++ b/src/data/default_vocab.cpp
@@ -30,14 +30,16 @@ private:
 
   class VocabFreqOrderer {
   private:
-    std::unordered_map<std::string, size_t>& counter_;
+    const std::unordered_map<std::string, size_t>& counter_;
 
   public:
-    VocabFreqOrderer(std::unordered_map<std::string, size_t>& counter)
-        : counter_(counter) {}
+    VocabFreqOrderer(const std::unordered_map<std::string, size_t>& counter)
+            : counter_(counter) {}
 
+    // order first by decreasing frequency, 
+    // if frequencies are the same order lexicographically by vocabulary string
     bool operator()(const std::string& a, const std::string& b) const {
-      return counter_[a] > counter_[b] || (counter_[a] == counter_[b] && a < b);
+      return counter_.at(a) > counter_.at(b) || (counter_.at(a) == counter_.at(b) && a < b);
     }
   };
 
@@ -117,10 +119,6 @@ public:
       auto str = pair.first;
       auto id = pair.second;
 
-      if(SPEC2SYM.count(str)) {
-        seenSpecial.insert(id);
-      }
-
       // note: this requires ids to be sorted by frequency
       if(!max || id < (Word)max) {
         insertWord(id, str);
@@ -174,8 +172,6 @@ public:
       };
       // @TODO: the hard-att code has not yet been updated to accept EOS at any id
       requireWord(DEFAULT_EOS_ID, DEFAULT_EOS_STR);
-      for(auto id : seenSpecial)
-        requireWord(id, SYM2SPEC.at(id));
     }
 
     return std::max((int)id2str_.size(), max);
@@ -187,52 +183,50 @@ public:
     unkId_ = insertWord(DEFAULT_UNK_ID, DEFAULT_UNK_STR);
   }
 
-  void create(const std::string& vocabPath, const std::string& trainPath) override {
-    LOG(info, "[data] Creating vocabulary {} from {}", vocabPath, trainPath);
-
-    filesystem::Path path(vocabPath);
-    auto dir = path.parentPath();
-    if(dir.empty())
-      dir = filesystem::currentPath();
+  virtual void create(const std::string& vocabPath,
+                      const std::vector<std::string>& trainPaths,
+                      size_t maxSize = 0) override {
 
-    ABORT_IF(!dir.empty() && !filesystem::isDirectory(dir),
-            "Specified vocab directory {} does not exist",
-            dir.string());
+    LOG(info, "[data] Creating vocabulary {} from {}",
+              vocabPath,
+              utils::join(trainPaths, ", "));
 
-    ABORT_IF(!dir.empty() && !filesystem::canWrite(dir),
-            "No write permission in vocab directory {}",
-            dir.string());
+    if(vocabPath != "stdout") {
+      filesystem::Path path(vocabPath);
+      auto dir = path.parentPath();
+      if(dir.empty())
+        dir = filesystem::currentPath();
 
-    ABORT_IF(filesystem::exists(vocabPath),
-            "DefaultVocab file '{}' exists. Not overwriting",
-            path.string());
+      ABORT_IF(!dir.empty() && !filesystem::isDirectory(dir),
+              "Specified vocab directory {} does not exist",
+              dir.string());
 
-    io::InputFileStream trainStrm(trainPath);
-    io::OutputFileStream vocabStrm(vocabPath);
-    create(trainStrm, vocabStrm);
+      ABORT_IF(filesystem::exists(vocabPath),
+              "Vocabulary file '{}' exists. Not overwriting",
+              path.string());
+    }
+    
+    std::unordered_map<std::string, size_t> counter;
+    for(const auto& trainPath : trainPaths)
+      addCounts(counter, trainPath);
+    create(vocabPath, counter, maxSize);
   }
 
-  void create(io::InputFileStream& trainStrm,
-              io::OutputFileStream& vocabStrm,
-              size_t maxSize = 0) override {
-    std::string line;
-    std::unordered_map<std::string, size_t> counter;
+private:
 
-    std::unordered_set<Word> seenSpecial;
+  void addCounts(std::unordered_map<std::string, size_t>& counter,
+                 const std::string& trainPath) {
+    std::unique_ptr<io::InputFileStream> trainStrm(
+      trainPath == "stdin" ? new io::InputFileStream(std::cin)
+                           : new io::InputFileStream(trainPath)
+    );
 
-    while(getline((std::istream&)trainStrm, line)) {
+    std::string line;
+    while(getline(*trainStrm, line)) {
       std::vector<std::string> toks;
-
-      // we do not want any unexpected behavior during creation
-      // e.g. sampling, hence use inference mode
       utils::split(line, toks, " ");
 
       for(const std::string& tok : toks) {
-        if(SPEC2SYM.count(tok)) {
-          seenSpecial.insert(SPEC2SYM.at(tok));
-          continue;
-        }
-
         auto iter = counter.find(tok);
         if(iter == counter.end())
           counter[tok] = 1;
@@ -240,6 +234,11 @@ public:
           iter->second++;
       }
     }
+  }
+
+  void create(const std::string& vocabPath,
+              const std::unordered_map<std::string, size_t>& counter,
+              size_t maxSize = 0) {
 
     std::vector<std::string> vocabVec;
     for(auto& p : counter)
@@ -251,14 +250,7 @@ public:
     vocabYaml.force_insert(DEFAULT_EOS_STR, DEFAULT_EOS_ID);
     vocabYaml.force_insert(DEFAULT_UNK_STR, DEFAULT_UNK_ID);
 
-    for(auto word : seenSpecial)
-      vocabYaml.force_insert(SYM2SPEC.at(word), word);
-
     Word maxSpec = 1;
-    for(auto i : seenSpecial)
-      if(i > maxSpec)
-        maxSpec = i;
-
     auto vocabSize = vocabVec.size();
     if(maxSize > maxSpec)
       vocabSize = std::min(maxSize - maxSpec - 1, vocabVec.size());
@@ -266,10 +258,13 @@ public:
     for(size_t i = 0; i < vocabSize; ++i)
       vocabYaml.force_insert(vocabVec[i], i + maxSpec + 1);
 
-    vocabStrm << vocabYaml;
+    std::unique_ptr<io::OutputFileStream> vocabStrm(
+      vocabPath == "stdout" ? new io::OutputFileStream(std::cout)
+                            : new io::OutputFileStream(vocabPath)
+    );
+    *vocabStrm << vocabYaml;
   }
 
-private:
   Words operator()(const std::vector<std::string>& lineTokens,
                    bool addEOS) const {
     Words words(lineTokens.size());
diff --git a/src/data/sentencepiece_vocab.cpp b/src/data/sentencepiece_vocab.cpp
index d9c24415..ed476ec4 100755
--- a/src/data/sentencepiece_vocab.cpp
+++ b/src/data/sentencepiece_vocab.cpp
@@ -2,13 +2,18 @@
 
 #ifdef USE_SENTENCEPIECE
 #include "sentencepiece/src/sentencepiece_processor.h"
-#endif 
+#include "sentencepiece/src/sentencepiece_trainer.h"
+#endif
 
+#include "common/config.h"
 #include "common/options.h"
 #include "common/logging.h"
 #include "common/filesystem.h"
 #include "common/regex.h"
 
+#include <sstream>
+#include <random>
+
 namespace marian {
 
 #ifdef USE_SENTENCEPIECE
@@ -28,9 +33,85 @@ private:
   Ptr<Options> options_;
   size_t batchIndex_{0};
 
+  std::mt19937 generator_;
+  std::uniform_int_distribution<int> randInt_; // from 0 to INT_MAX
+
+  // Sample from one file, based on first algorithm from:
+  // https://en.wikipedia.org/wiki/Reservoir_sampling
+  void reservoirSampling(std::vector<std::string>& sample, size_t& seenLines,
+                        const std::string& trainPath, size_t maxLines, size_t maxBytes) {
+
+    ABORT_IF(maxLines == 0, "Sample needs to be larger 0");
+
+    std::unique_ptr<io::InputFileStream> trainStrm(
+      trainPath == "stdin" ? new io::InputFileStream(std::cin)
+                           : new io::InputFileStream(trainPath)
+    );
+
+    std::string line;
+    while(getline(*trainStrm, line)) {
+      if(line.size() > 0 && line.size() < maxBytes) {
+        if(sample.size() < maxLines) {
+          sample.push_back(line);
+        }
+        else {
+          size_t i = randInt_(generator_) % (seenLines + 1);
+          if(i < maxLines)
+            sample[i] = line;
+        }
+        seenLines++;
+      }
+    }
+  }
+
+  // Iterate over all input files and collect a representative sample via reservoir sampling.
+  // The sample will first grow to the desired size and next keep sampling with decreasing
+  // probability in the hope to get a uniform sample from the union of all files.
+  size_t reservoirSamplingAll(io::TemporaryFile& temp,
+                             const std::vector<std::string>& trainPaths,
+                             size_t maxLines, size_t maxBytes) {
+    LOG(info, "[SentencePiece] Sampling at most {} lines from {}", maxLines, utils::join(trainPaths, ", "));
+
+    std::vector<std::string> sample;
+    size_t seenLines = 0;
+    for(const auto& trainPath : trainPaths)
+      reservoirSampling(sample, seenLines, trainPath, maxLines, maxBytes);
+    std::shuffle(sample.begin(), sample.end(), generator_);
+
+    io::OutputFileStream out(temp);
+    for(const auto& line : sample)
+        out << line << std::endl;
+
+    LOG(info, "[SentencePiece] Selected {} lines", sample.size());
+    return sample.size();
+  }
+
+  // Just concatenate all files to a temporary file so SentencePiece can consume it.
+  size_t dumpAll(io::TemporaryFile& temp,
+                 const std::vector<std::string>& trainPaths,
+                 size_t maxBytes) {
+    LOG(info, "[SentencePiece] Selecting all lines from {}", utils::join(trainPaths, ", "));
+
+    size_t seenLines = 0;
+    std::string line;
+    io::OutputFileStream out(temp);
+    for(const auto& trainPath : trainPaths) {
+      io::InputFileStream in(trainPath);
+      while(getline(in, line)) {
+        if(line.size() > 0 && line.size() < maxBytes) {
+          out << line << std::endl;
+          seenLines++;
+        }
+      }
+    }
+
+    LOG(info, "[SentencePiece] Selected {} lines", seenLines);
+    return seenLines;
+  }
+
 public:
   SentencePieceVocab(Ptr<Options> options, size_t batchIndex)
-    : options_(options), batchIndex_(batchIndex) {
+    : options_(options), batchIndex_(batchIndex), generator_(Config::seed) {
 
     if(options_->has("sentencepiece-alphas")) {
       auto alphas = options_->get<std::vector<float>>("sentencepiece-alphas");
@@ -41,47 +122,91 @@ public:
 
       if(alpha_ > 0)
         LOG(debug,
-            "Setting SentencePieceVocab sampling factor to {} for input {}",
+            "Setting SentencePiece vocabulary sampling factor to {} for input {}",
             alpha_,
             batchIndex_);
     }
 
   }
 
-  virtual const std::string& canonicalExtension() const { return suffixes_[0]; }
-  virtual const std::vector<std::string>& suffixes() const { return suffixes_; }
+  virtual const std::string& canonicalExtension() const override { return suffixes_[0]; }
+  virtual const std::vector<std::string>& suffixes() const override { return suffixes_; }
 
   virtual std::string suffix() { return suffixes_[0]; };
 
-  virtual std::string type() const { return "SentencePieceVocab"; }
+  virtual std::string type() const override { return "SentencePieceVocab"; }
 
   virtual Word getEosId() const override { return (Word)spm_->eos_id(); }
   virtual Word getUnkId() const override { return (Word)spm_->unk_id(); }
 
-  void create(const std::string& /*vocabPath*/, const std::string& /*trainPath*/) {
-    ABORT("[data] Training of SentencePieceVocab not yet supported");
-  }
+  void create(const std::string& vocabPath,
+              const std::vector<std::string>& trainPaths,
+              size_t maxSize) override {
+
+    size_t defaultMaxSize = 32000;
+    size_t maxLines = options_->get<size_t>("sentencepiece-max-lines");
+    size_t maxBytes = 2048;
+
+    LOG(info, "[SentencePiece] Training SentencePiece vocabulary {}", vocabPath);
 
-  void create(io::InputFileStream& /*trainStrm*/,
-              io::OutputFileStream& /*vocabStrm*/,
-              size_t /*maxSize*/) {
-    ABORT("[data] Training of SentencePieceVocab not yet supported");
+    if(maxSize == 0) {
+      LOG(info, "[SentencePiece] Vocabulary size is undefined (set with --dim-vocabs ...) - setting to {}", defaultMaxSize);
+      maxSize = defaultMaxSize;
+    }
+
+    // Create temporary file to hold the sample for the SentencePiece trainer
+    io::TemporaryFile temp(options_->get<std::string>("tempdir"), false);
+    std::string tempFileName = temp.getFileName();
+    LOG(info, "[SentencePiece] Creating temporary file {}", tempFileName);
+
+    size_t seenLines = 0;
+    if(maxLines == 0)
+      seenLines = dumpAll(temp, trainPaths, maxBytes);
+    else
+      seenLines = reservoirSamplingAll(temp, trainPaths, maxLines, maxBytes);
+
+    // Compose the SentencePiece training command from filenames and parameters0
+    std::stringstream command;
+    command
+      << " --bos_id=-1 --eos_id=0 --unk_id=1" // these should not be changed as they match Marian defaults
+      << " --input="               << tempFileName
+      << " --model_prefix="        << vocabPath
+      << " --vocab_size="          << maxSize
+      << " --max_sentence_length=" << maxBytes
+      << " --input_sentence_size=" << seenLines
+      << " " << options_->get<std::string>("sentencepiece-options"); // these are SentencePiece command line options
+
+    // Train the SentencePiece model
+    const auto status = sentencepiece::SentencePieceTrainer::Train(command.str());
+    ABORT_IF(!status.ok(),
+             "SentencePiece vocabulary error: {}",
+             status.ToString());
+
+    LOG(info, "[SentencePiece] Removing {}", vocabPath + ".vocab");
+    ABORT_IF(remove((vocabPath + ".vocab").c_str()) != 0,
+             "Could not remove {}",
+             vocabPath + ".vocab");
+
+    LOG(info, "[SentencePiece] Renaming {} to {}", vocabPath + ".model", vocabPath);
+    ABORT_IF(rename((vocabPath + ".model").c_str(), vocabPath.c_str()) != 0,
+             "Could not rename {} to {}",
+             vocabPath + ".model", vocabPath);
   }
 
-  void createFake() {
-    ABORT("[data] Fake SentencePieceVocab not supported");
+  void createFake() override {
+    ABORT("[SentencePiece] Fake SentencePiece vocabulary not supported");
   }
 
-  Word operator[](const std::string& token) const {
+  Word operator[](const std::string& token) const override {
     return (Word)spm_->PieceToId(token);
   }
 
-  const std::string& operator[](Word id) const {
+  const std::string& operator[](Word id) const override {
     ABORT_IF(id >= size(), "Unknown word id: ", id);
     return spm_->IdToPiece(id);
   }
 
-  Words encode(const std::string& line, bool addEOS, bool inference) const {
+  Words encode(const std::string& line, bool addEOS, bool inference) const override {
     std::vector<int> spmIds;
     if(inference || alpha_ == 0)
       spm_->Encode(line, &spmIds);
@@ -95,7 +220,7 @@ public:
     return words;
   }
 
-  std::string decode(const Words& sentence, bool ignoreEOS) const {
+  std::string decode(const Words& sentence, bool /*ignoreEOS*/) const override {
     std::string line;
     // convert vector of Word to vector of int
     std::vector<int> spmSentence(sentence.begin(), sentence.end());
@@ -103,29 +228,29 @@ public:
     return line;
   }
 
-  size_t size() const {
+  size_t size() const override {
     return spm_->GetPieceSize();
   }
 
-  int load(const std::string& vocabPath, int /*max*/) {
-    LOG(info, "[data] Loading SentencePieceVocab from file {}", vocabPath);
+  int load(const std::string& vocabPath, int /*max*/) override {
+    LOG(info, "[data] Loading SentencePiece vocabulary from file {}", vocabPath);
 
     ABORT_IF(!filesystem::exists(vocabPath),
-            "SentencePieceVocab file {} does not exits",
-            vocabPath);
+             "SentencePiece vocabulary file {} does not exits",
+             vocabPath);
 
     spm_.reset(new sentencepiece::SentencePieceProcessor());
     const auto status = spm_->Load(vocabPath);
 
     ABORT_IF(!status.ok(),
-            "SentencePieceVocab error: {}",
-            status.ToString());
+             "SentencePiece vocabulary error: {}",
+             status.ToString());
 
     return spm_->GetPieceSize();
   }
 
 };
-#endif
+#endif // USE_SENTENCEPIECE
 
 Ptr<VocabBase> createSentencePieceVocab(const std::string& vocabPath, Ptr<Options> options, size_t batchIndex) {
   bool isSentencePiece = regex::regex_search(vocabPath, regex::regex("\\.(spm)$"));
diff --git a/src/data/types.h b/src/data/types.h
index 62566a74..2bda6ece 100644
--- a/src/data/types.h
+++ b/src/data/types.h
@@ -28,27 +28,4 @@ const std::string DEFAULT_UNK_STR = "<unk>";
 const std::string NEMATUS_EOS_STR = "eos";
 const std::string NEMATUS_UNK_STR = "UNK";
 
-const Word STP_ID = 2;
-const Word CPY_ID = 3;
-const Word DEL_ID = 4;
-const Word RPL_ID = 5;
-
-const std::string STP_STR = "<step>";
-const std::string CPY_STR = "<c>";
-const std::string DEL_STR = "<d>";
-const std::string RPL_STR = "<r>";
-
-const std::unordered_map<std::string, Word> SPEC2SYM = {
-    {STP_STR, STP_ID},
-    {CPY_STR, CPY_ID},
-    {DEL_STR, DEL_ID},
-    {RPL_STR, RPL_ID},
-};
-
-const std::unordered_map<Word, std::string> SYM2SPEC = {
-    {STP_ID, STP_STR},
-    {CPY_ID, CPY_STR},
-    {DEL_ID, DEL_STR},
-    {RPL_ID, RPL_STR},
-};
 }  // namespace marian
diff --git a/src/data/vocab.cpp b/src/data/vocab.cpp
index 09849b2e..e95ea721 100755
--- a/src/data/vocab.cpp
+++ b/src/data/vocab.cpp
@@ -13,62 +13,63 @@ Ptr<VocabBase> createVocab(const std::string& vocabPath, Ptr<Options> options, s
 }
 
 int Vocab::loadOrCreate(const std::string& vocabPath,
-                        const std::string& trainPath,
-                        int max) {
+                        const std::vector<std::string>& trainPaths,
+                        size_t maxSize) {
   size_t size = 0;
   if(vocabPath.empty()) {
     // No vocabulary path was given, attempt to first find a vocabulary
-    // for trainPath + possible suffixes. If not found attempt to create
-    // as trainPath + canonical suffix.
+    // for trainPaths[0] + possible suffixes. If not found attempt to create
+    // as trainPaths[0] + canonical suffix.
+    // Only search based on first path, maybe disable this at all?
 
     LOG(info,
         "No vocabulary path given; "
         "trying to find default vocabulary based on data path {}",
-        trainPath);
+        trainPaths[0]);
 
     vImpl_ = createDefaultVocab();
-    size = vImpl_->findAndLoad(trainPath, max);
+    size = vImpl_->findAndLoad(trainPaths[0], maxSize);
 
     if(size == 0) {
-      auto path = trainPath + vImpl_->canonicalExtension();
+      auto newVocabPath = trainPaths[0] + vImpl_->canonicalExtension();
       LOG(info,
           "No vocabulary path given; "
-          "trying to find vocabulary based on data path {}",
-          trainPath);
-      vImpl_->create(path, trainPath);
-      size = vImpl_->load(path, max);
+          "trying to create vocabulary based on data paths {}",
+          utils::join(trainPaths, ", "));
+      create(newVocabPath, trainPaths, maxSize);
+      size = load(newVocabPath, maxSize);
     }
   } else {
     if(!filesystem::exists(vocabPath)) {
       // Vocabulary path was given, but no vocabulary present,
       // attempt to create in specified location.
-      create(vocabPath, trainPath);
+      create(vocabPath, trainPaths, maxSize);
     }
     // Vocabulary path exists, attempting to load
-    size = load(vocabPath, max);
+    size = load(vocabPath, maxSize);
   }
   LOG(info, "[data] Setting vocabulary size for input {} to {}", batchIndex_, size);
   return (int)size;
 }
 
-int Vocab::load(const std::string& vocabPath, int max) {
+int Vocab::load(const std::string& vocabPath, size_t maxSize) {
   if(!vImpl_)
     vImpl_ = createVocab(vocabPath, options_, batchIndex_);
-  return vImpl_->load(vocabPath, max);
+  return vImpl_->load(vocabPath, maxSize);
 }
 
-void Vocab::create(const std::string& vocabPath, const std::string& trainPath) {
+void Vocab::create(const std::string& vocabPath,
+                   const std::vector<std::string>& trainPaths,
+                   size_t maxSize) {
   if(!vImpl_)
     vImpl_ = createVocab(vocabPath, options_, batchIndex_);
-  vImpl_->create(vocabPath, trainPath);
+  vImpl_->create(vocabPath, trainPaths, maxSize);
 }
 
-void Vocab::create(io::InputFileStream& trainStrm,
-                   io::OutputFileStream& vocabStrm,
+void Vocab::create(const std::string& vocabPath,
+                   const std::string& trainPath,
                    size_t maxSize) {
-  if(!vImpl_)
-    vImpl_ = createDefaultVocab(); // Only DefaultVocab can be built from streams
-  vImpl_->create(trainStrm, vocabStrm, maxSize);
+  create(vocabPath, std::vector<std::string>({trainPath}), maxSize);
 }
 
 void Vocab::createFake() {
diff --git a/src/data/vocab.h b/src/data/vocab.h
index 1551f746..4bad1795 100755
--- a/src/data/vocab.h
+++ b/src/data/vocab.h
@@ -26,15 +26,18 @@ public:
   : options_(options), batchIndex_(batchIndex) {}
 
   int loadOrCreate(const std::string& vocabPath,
-                   const std::string& textPath,
-                   int max = 0);
+                   const std::vector<std::string>& trainPaths,
+                   size_t maxSize = 0);
 
-  int load(const std::string& vocabPath, int max = 0);
-  void create(const std::string& vocabPath, const std::string& trainPath);
+  int load(const std::string& vocabPath, size_t maxSize = 0);
 
-  void create(io::InputFileStream& trainStrm,
-              io::OutputFileStream& vocabStrm,
-              size_t maxSize = 0);
+  void create(const std::string& vocabPath,
+              const std::vector<std::string>& trainPaths,
+              size_t maxSize);
+
+  void create(const std::string& vocabPath,
+              const std::string& trainPath,
+              size_t maxSize);
 
   // string token to token id
   Word operator[](const std::string& word) const;
diff --git a/src/data/vocab_base.h b/src/data/vocab_base.h
index 23e1520c..d3078d9a 100644
--- a/src/data/vocab_base.h
+++ b/src/data/vocab_base.h
@@ -1,19 +1,19 @@
 #pragma once
 
+#include "data/types.h"
 #include "common/definitions.h"
+#include "common/utils.h"
 #include "common/file_stream.h"
-#include "data/types.h"
 
 namespace marian {
 
 class VocabBase {
 public:
   virtual int load(const std::string& vocabPath, int max = 0) = 0;
-  virtual void create(const std::string& vocabPath, const std::string& trainPath) = 0;
 
-  virtual void create(io::InputFileStream& trainStrm,
-                      io::OutputFileStream& vocabStrm,
-                      size_t maxSize = 0) = 0;
+  virtual void create(const std::string& vocabPath,
+                      const std::vector<std::string>& trainPaths,
+                      size_t maxSize) = 0;
 
   // return canonical suffix for given type of vocabulary
   virtual const std::string& canonicalExtension() const = 0;
diff --git a/src/examples/mnist/model_lenet.h b/src/examples/mnist/model_lenet.h
index ac0298e3..c2a39977 100644
--- a/src/examples/mnist/model_lenet.h
+++ b/src/examples/mnist/model_lenet.h
@@ -12,12 +12,12 @@ public:
   MnistLeNet(Ptr<Options> options, Args... args)
       : MnistFeedForwardNet(options, args...) {}
 
-  virtual void clear(Ptr<ExpressionGraph> graph) { graph->clear(); };
+  virtual void clear(Ptr<ExpressionGraph> graph) override { graph->clear(); };
 
 protected:
   virtual Expr construct(Ptr<ExpressionGraph> g,
                          Ptr<data::Batch> batch,
-                         bool inference = false) {
+                         bool inference = false) override {
     const std::vector<int> dims = {784, 128, 10};
 
     // Start with an empty expression graph
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index 01546c1e..7da85443 100755
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -490,20 +490,20 @@ struct RowsNodeOp : public NaryNodeOp {
 // This operation indexes a tensor along an axis.
 // This is similar to the common gather() operation in other toolkits.
 // For example, this can be used for:
-//  - Same index applied to all batch items (today's select()):
-//    'index' has 1 in the axes that match batch axes in the input, and axis set to the one axis that gets selected over.
-//    Example: Selecting Transformer head 0, i.e. return a[:,1,:,:]
-//      axis = -3
-//      a  : (B,  H , S, T)     B=batch dim, H=#heads, S=src length, T=trg length
-//      idx: (   #1#, 1, 1)     #1# denotes 'axis'. All values are zero.
-//      out: (B,  1 , S, T)     out[b, 0, s, t] == a[b, idx[/*0,*/ 0, s, t], s, t]
-//  - Same data with batched indices (today's rows()):
-//    'data' has 1 in the batch axes.
-//    Example: Embedding lookup as done today using rows():
-//      axis = -2
-//      e  : (     V , E)        V=vocab size, E=embedding dimension
-//      idx: (#(B*S)#, 1)        B=batch size, S=source length, idx values are in range 0..V-1
-//      out: ( (B*S) , E)        out[b, s, e] == e[/*0,*/ idx[b, s, 0], e]
+//  - Same index applied to all batch items (today's select()):
+//    'index' has 1 in the axes that match batch axes in the input, and axis set to the one axis that gets selected over.
+//    Example: Selecting Transformer head 0, i.e. return a[:,1,:,:]
+//      axis = -3
+//      a  : (B,  H , S, T)     B=batch dim, H=#heads, S=src length, T=trg length
+//      idx: (   #1#, 1, 1)     #1# denotes 'axis'. All values are zero.
+//      out: (B,  1 , S, T)     out[b, 0, s, t] == a[b, idx[/*0,*/ 0, s, t], s, t]
+//  - Same data with batched indices (today's rows()):
+//    'data' has 1 in the batch axes.
+//    Example: Embedding lookup as done today using rows():
+//      axis = -2
+//      e  : (     V , E)        V=vocab size, E=embedding dimension
+//      idx: (#(B*S)#, 1)        B=batch size, S=source length, idx values are in range 0..V-1
+//      out: ( (B*S) , E)        out[b, s, e] == e[/*0,*/ idx[b, s, 0], e]
 //  - Batched selection (x-ent scenario): Both 'index' and 'data' have matching batch axes.
 //    Example: Cross-entropy loss as -select(logSoftmax(logits), groundTruth, axis=-1):
 //      axis = -1
@@ -511,14 +511,14 @@ struct RowsNodeOp : public NaryNodeOp {
 //      idx: (B, T, #1#)        idx values are in range 0..V-1
 //      out: (B, T,  1 )        out[b,t,0] == lp[b, t, idx[b, t, 0]]
 // Example for 2D tensor with axis=0:
-//  | t[index[0, 0] 0]   t[index[0, 1] 1] |
-//  | t[index[1, 0] 0]   t[index[1, 1] 1] |
-// And for axis 1:
-//  | t[0 index[0, 0]]   t[0 index[0, 1]] |
-//  | t[1 index[1, 0]]   t[1 index[1, 1]] |
-// For a 3-D tensor the output is specified by:
-//  out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
-//  out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
+//  | t[index[0, 0] 0]   t[index[0, 1] 1] |
+//  | t[index[1, 0] 0]   t[index[1, 1] 1] |
+// And for axis 1:
+//  | t[0 index[0, 0]]   t[0 index[0, 1]] |
+//  | t[1 index[1, 0]]   t[1 index[1, 1]] |
+// For a 3-D tensor the output is specified by:
+//  out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
+//  out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
 //  out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2
 // If 'a' and 'indices' do not have the same rank, then negative 'axis' is
 // interpreted relative to 'a', and 'indices' must have the resulting axis.
@@ -953,6 +953,7 @@ struct HighwayNodeOp : public NaryNodeOp {
 };
 
 #ifdef CUDNN
+
 class ConvolutionOp : public NaryNodeOp {
 public:
   ConvolutionOp(const std::vector<Expr>& nodes,
@@ -970,12 +971,12 @@ public:
     conv_.getOutputShape(nodes[0]->shape(), shape_);
   }
 
-  NodeOps forwardOps() {
+  NodeOps forwardOps() override {
     return {NodeOp(conv_.forward(
         child(0)->val(), child(1)->val(), child(2)->val(), val_))};
   }
 
-  NodeOps backwardOps() {
+  NodeOps backwardOps() override {
     return {NodeOp(conv_.backward(child(0)->val(),
                                   child(0)->grad(),
                                   child(1)->val(),
@@ -984,7 +985,7 @@ public:
                                   adj_))};
   }
 
-  const std::string type() { return "layer_convolution"; }
+  const std::string type() override { return "layer_convolution"; }
 
 protected:
   ConvolutionWrapper conv_;
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 9a752786..b8b19208 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -881,16 +881,16 @@ public:
                  strideWidth,
                  mode) {}
 
-  NodeOps forwardOps() {
+  NodeOps forwardOps() override {
     return {NodeOp(pooling_.forward(child(0)->val(), val_))};
   }
 
-  NodeOps backwardOps() {
+  NodeOps backwardOps() override {
     return {NodeOp(
         pooling_.backward(child(0)->val(), child(0)->grad(), val_, adj_))};
   }
 
-  const std::string type() { return "layer_pooling"; }
+  const std::string type() override { return "layer_pooling"; }
 
 protected:
   PoolingWrapper pooling_;
diff --git a/src/layers/loss.cpp b/src/layers/loss.cpp
index 87a2a1fd..03b79682 100755
--- a/src/layers/loss.cpp
+++ b/src/layers/loss.cpp
@@ -15,6 +15,8 @@ Ptr<LossBase> LossFactory(Ptr<Options> options, bool inference) {
     return New<PerplexityLoss>(smoothing);
   } else if(costType == "ce-rescore") {
     return New<CrossEntropyRescoreLoss>(smoothing);
+  } else if(costType == "ce-rescore-mean") {
+    return New<CrossEntropyRescoreMeanLoss>(smoothing);
   } else {  // same as ce-mean
     return New<CrossEntropyMeanLoss>(smoothing);
   }
@@ -108,4 +110,14 @@ Expr CrossEntropyRescoreLoss::getCost(Expr logits,
   auto ce = getCrossEntropy(logits, indices, mask, weights);
   return -sum(ce, /*axis =*/ -3);
 }
+
+Expr CrossEntropyRescoreMeanLoss::getCost(Expr logits,
+                                          Expr indices,
+                                          Expr mask,
+                                          Expr weights) {
+  auto ce = getCrossEntropy(logits, indices, mask, weights);
+  // divide by number of words in sentence
+  return -sum(ce, /*axis =*/ -3) / sum(mask, /*axis =*/ -3);
+}
+
 }  // namespace marian
diff --git a/src/layers/loss.h b/src/layers/loss.h
index 89d20819..ebf71147 100644
--- a/src/layers/loss.h
+++ b/src/layers/loss.h
@@ -66,5 +66,11 @@ public:
   Expr getCost(Expr logits, Expr indices, Expr mask, Expr weights) override;
 };
 
+class CrossEntropyRescoreMeanLoss : public LossBase {
+public:
+  explicit CrossEntropyRescoreMeanLoss(float smoothing = 0) : LossBase(smoothing){};
+  Expr getCost(Expr logits, Expr indices, Expr mask, Expr weights) override;
+};
+
 Ptr<LossBase> LossFactory(Ptr<Options> options, bool inference);
 }  // namespace marian
diff --git a/src/layers/word2vec_reader.h b/src/layers/word2vec_reader.h
index f18fd439..a7e85592 100755
--- a/src/layers/word2vec_reader.h
+++ b/src/layers/word2vec_reader.h
@@ -18,8 +18,6 @@ public:
     LOG(info, "[data] Loading embedding vectors from {}", fileName);
 
     io::InputFileStream embFile(fileName);
-    ABORT_IF(!embFile.isOpen(),
-             "Unable to open file with embeddings: " + fileName);
 
     std::string line;
     std::vector<std::string> values;
@@ -75,19 +73,19 @@ private:
     values.reserve(dimEmb);
     // Glorot numal distribution
     float scale = sqrtf(2.0f / (dimVoc + dimEmb));
-    
+
     // @TODO: switch to new random generator back-end.
-    // This is rarly used however. 
+    // This is rarly used however.
     std::random_device rd;
     std::mt19937 engine(rd());
- 
+
     std::normal_distribution<float> d(0, scale);
     auto gen = [&d, &engine] () {
        return d(engine);
     };
 
     std::generate(values.begin(), values.end(), gen);
- 
+
     return values;
   }
 };
diff --git a/src/models/char_s2s.h b/src/models/char_s2s.h
index c4dce6f5..6d5d1db1 100644
--- a/src/models/char_s2s.h
+++ b/src/models/char_s2s.h
@@ -12,7 +12,7 @@ public:
   CharS2SEncoder(Ptr<Options> options) : EncoderS2S(options) {}
 
   virtual Ptr<EncoderState> build(Ptr<ExpressionGraph> graph,
-                                  Ptr<data::CorpusBatch> batch) {
+                                  Ptr<data::CorpusBatch> batch) override {
     auto embeddings = buildSourceEmbeddings(graph);
 
     // select embeddings that occur in the batch
diff --git a/src/models/hardatt.h b/src/models/hardatt.h
deleted file mode 100755
index 77ba7f44..00000000
--- a/src/models/hardatt.h
+++ /dev/null
@@ -1,303 +0,0 @@
-#pragma once
-
-#include "marian.h"
-
-#include "layers/generic.h"
-#include "rnn/attention_constructors.h"
-#include "rnn/types.h"
-
-#include <numeric>
-
-namespace marian {
-
-class DecoderStateHardAtt : public DecoderState {
-protected:
-  std::vector<IndexType> attentionIndices_;
-
-public:
-  DecoderStateHardAtt(const rnn::States& states,
-                      Expr logProbs,
-                      const std::vector<Ptr<EncoderState>>& encStates,
-                      Ptr<data::CorpusBatch> batch)
-      : DecoderState(states, logProbs, encStates, batch) {}
-
-  virtual Ptr<DecoderState> select(const std::vector<IndexType>& selIdx,
-                                   int beamSize) const override {
-    std::vector<IndexType> selectedAttentionIndices;
-    for(auto i : selIdx)
-      selectedAttentionIndices.push_back(attentionIndices_[i]);
-
-    auto selectedState = New<DecoderStateHardAtt>(states_.select(selIdx, beamSize, /*isBatchMajor=*/false),
-                                    logProbs_,
-                                    encStates_,
-                                    batch_);
-    selectedState->attentionIndices_ = selectedAttentionIndices;
-
-    // Set positon of new state based on the target token position of current
-    // state
-    // @TODO: I copied this to make this consistent with the other instances. Needed?
-    selectedState->setPosition(getPosition());
-    return selectedState;
-  }
-
-  // @TODO: why are these virtual?
-  virtual void setAttentionIndices(
-      const std::vector<IndexType>& attentionIndices) {
-    attentionIndices_ = attentionIndices;
-  }
-
-  virtual std::vector<IndexType>& getAttentionIndices() {
-    ABORT_IF(attentionIndices_.empty(), "Empty attention indices");
-    return attentionIndices_;
-  }
-
-  virtual void blacklist(Expr totalCosts, Ptr<data::CorpusBatch> batch) override {
-    auto attentionIdx = getAttentionIndices();
-    int dimVoc = totalCosts->shape()[-1];
-    for(size_t i = 0; i < attentionIdx.size(); i++) {
-      if(batch->front()->data()[attentionIdx[i]] != 0) {
-        totalCosts->val()->set(
-            i * dimVoc + DEFAULT_EOS_ID,  // this is checked at vocab-load time
-                                          // if the special tokens are present
-            std::numeric_limits<float>::lowest());
-      } else {
-        totalCosts->val()->set(i * dimVoc + STP_ID,
-                               std::numeric_limits<float>::lowest());
-      }
-    }
-  }
-};
-
-class DecoderHardAtt : public DecoderBase {
-protected:
-  Ptr<rnn::RNN> rnn_;
-  std::unordered_set<Word> specialSymbols_;
-
-public:
-  DecoderHardAtt(Ptr<Options> options) : DecoderBase(options) {
-    if(options->has("special-vocab")) {
-      auto spec = options->get<std::vector<Word>>("special-vocab");
-      specialSymbols_.insert(spec.begin(), spec.end());
-    }
-  }
-
-  virtual Ptr<DecoderState> startState(
-      Ptr<ExpressionGraph> graph,
-      Ptr<data::CorpusBatch> batch,
-      std::vector<Ptr<EncoderState>>& encStates) override {
-
-    std::vector<Expr> meanContexts;
-    for(auto& encState : encStates) {
-      // average the source context weighted by the batch mask
-      // this will remove padded zeros from the average
-      meanContexts.push_back(weighted_average(
-          encState->getContext(), encState->getMask(), /*axis =*/ -3));
-    }
-
-    Expr start;
-    if(!meanContexts.empty()) {
-      // apply single layer network to mean to map into decoder space
-      auto mlp = mlp::mlp(graph)                                     //
-                     .push_back(mlp::dense(graph)                    //
-                                ("prefix", prefix_ + "_ff_state")    //
-                                ("dim", opt<int>("dim-rnn"))         //
-                                ("activation", (int)mlp::act::tanh)  //
-                                ("layer-normalization",
-                                 opt<bool>("layer-normalization")));
-      start = mlp->apply(meanContexts);
-    }
-
-    rnn::States startStates(opt<size_t>("dec-depth"), {start, start});
-    auto startState = New<DecoderStateHardAtt>(startStates, nullptr, encStates, batch);
-    startState->setAttentionIndices(std::vector<IndexType>({ 0 }));
-    return startState;
-  }
-
-  virtual Ptr<DecoderState> step(Ptr<ExpressionGraph> graph,
-                                 Ptr<DecoderState> state) override {
-
-    auto type = options_->get<std::string>("type");
-
-    int dimTrgVoc = options_->get<std::vector<int>>("dim-vocabs").back();
-
-    int dimTrgEmb = options_->get<int>("dim-emb");
-
-    int dimDecState = options_->get<int>("dim-rnn");
-    bool layerNorm = options_->get<bool>("layer-normalization");
-    bool skipDepth = options_->get<bool>("skip");
-
-    size_t decoderLayers = options_->get<size_t>("dec-depth");
-    auto cellType = options_->get<std::string>("dec-cell");
-
-    float dropoutRnn = inference_ ? 0 : options_->get<float>("dropout-rnn");
-    float dropoutTrg = inference_ ? 0 : options_->get<float>("dropout-trg");
-
-    auto stateHardAtt = std::dynamic_pointer_cast<DecoderStateHardAtt>(state);
-
-    auto trgEmbeddings = stateHardAtt->getTargetEmbeddings();
-
-    auto context = stateHardAtt->getEncoderStates()[0]->getContext();
-    int dimContext = context->shape()[-1];
-    int dimSrcWords = context->shape()[-3];
-
-    int dimBatch = context->shape()[-2];
-    int dimTrgWords = trgEmbeddings->shape()[-3];
-    int dimBeam = trgEmbeddings->shape()[-4];
-
-    if(dropoutTrg) {
-      trgEmbeddings
-          = dropout(trgEmbeddings, dropoutTrg, {dimTrgWords, dimBatch, 1});
-    }
-
-    auto flatContext = reshape(context, {dimBatch * dimSrcWords, dimContext});
-    auto attendedContext
-        = rows(flatContext, stateHardAtt->getAttentionIndices());
-    attendedContext = reshape(attendedContext,
-                              {dimBeam, dimTrgWords, dimBatch, dimContext});
-
-    auto rnnInputs = concatenate({trgEmbeddings, attendedContext}, /*axis =*/ -1);
-    int dimInput = rnnInputs->shape()[-1];
-
-    if(!rnn_) {
-      auto rnn = rnn::rnn(graph)              //
-          ("type", cellType)                  //
-          ("dimInput", dimInput)              //
-          ("dimState", dimDecState)           //
-          ("dropout", dropoutRnn)             //
-          ("layer-normalization", layerNorm)  //
-          ("skip", skipDepth);
-
-      if(type == "hard-soft-att") {
-        auto attCell = rnn::stacked_cell(graph)         //
-                           .push_back(rnn::cell(graph)  //
-                                      ("prefix", prefix_ + "_cell1"));
-        for(size_t i = 0; i < state->getEncoderStates().size(); ++i) {
-          std::string prefix = prefix_;
-          if(state->getEncoderStates().size() > 1)
-            prefix += "_att" + std::to_string(i + 1);
-
-          attCell.push_back(rnn::attention(graph)  //
-                            ("prefix", prefix)     //
-                                .set_state(state->getEncoderStates()[i]));
-        }
-
-        attCell.push_back(rnn::cell(graph)                //
-                          ("prefix", prefix_ + "_cell2")  //
-                          ("final", true));
-        rnn.push_back(attCell);
-      } else {
-        rnn.push_back(rnn::cell(graph)("prefix", prefix_));
-      }
-
-      for(size_t i = 0; i < decoderLayers - 1; ++i)
-        rnn.push_back(rnn::cell(graph)  //
-                      ("prefix", prefix_ + "_l" + std::to_string(i)));
-
-      rnn_ = rnn.construct();
-    }
-
-    auto decContext = rnn_->transduce(rnnInputs, stateHardAtt->getStates());
-    rnn::States decStates = rnn_->lastCellStates();
-
-    //// 2-layer feedforward network for outputs and cost
-    auto out = mlp::mlp(graph)
-                   .push_back(mlp::dense(graph)                     //
-                              ("prefix", prefix_ + "_ff_logit_l1")  //
-                              ("dim", dimTrgEmb)                    //
-                              ("activation", (int)mlp::act::tanh)   //
-                              ("layer-normalization", layerNorm))   //
-                   .push_back(mlp::dense(graph)                     //
-                              ("prefix", prefix_ + "_ff_logit_l2")  //
-                              ("dim", dimTrgVoc));
-
-    Expr logits;
-    if(type == "hard-soft-att") {
-      std::vector<Expr> alignedContexts;
-      for(int k = 0; k < state->getEncoderStates().size(); ++k) {
-        // retrieve all the aligned contexts computed by the attention mechanism
-        auto att = rnn_->at(0)
-                       ->as<rnn::StackedCell>()
-                       ->at(k + 1)
-                       ->as<rnn::Attention>();
-        alignedContexts.push_back(att->getContext());
-      }
-
-      Expr alignedContext;
-      if(alignedContexts.size() > 1)
-        alignedContext = concatenate(alignedContexts, /*axis =*/ -1);
-      else if(alignedContexts.size() == 1)
-        alignedContext = alignedContexts[0];
-
-      logits = out->apply(rnnInputs, decContext, alignedContext);
-    } else {
-      logits = out->apply(rnnInputs, decContext);
-    }
-
-    auto nextState = New<DecoderStateHardAtt>(decStates,
-                                             logits,
-                                             stateHardAtt->getEncoderStates(),
-                                             stateHardAtt->getBatch());
-    nextState->setAttentionIndices(std::vector<IndexType>(stateHardAtt->getAttentionIndices()));
-    nextState->setPosition(state->getPosition() + 1); // @TODO: I added this for consistency. Correct?
-    return nextState;
-  }
-
-  const std::vector<Expr> getAlignments() {
-    auto att = rnn_->at(0)->as<rnn::StackedCell>()->at(1)->as<rnn::Attention>();
-    return att->getAlignments();
-  }
-
-  void embeddingsFromBatch(Ptr<ExpressionGraph> graph,
-                           Ptr<DecoderState> state,
-                           Ptr<data::CorpusBatch> batch) override {
-    DecoderBase::embeddingsFromBatch(graph, state, batch);
-
-    auto subBatch = (*batch)[batchIndex_];
-    int dimBatch = (int)subBatch->batchSize();
-    int dimWords = (int)subBatch->batchWidth();
-
-    std::vector<IndexType> attentionIndices(dimBatch, 0);
-    std::vector<IndexType> currentPos(dimBatch, 0);
-    std::iota(currentPos.begin(), currentPos.end(), 0);
-
-    for(int i = 0; i < dimWords - 1; ++i) {
-      for(int j = 0; j < dimBatch; ++j) {
-        Word word = subBatch->data()[i * dimBatch + j];
-        if(specialSymbols_.count(word))
-          currentPos[j] += dimBatch;
-        attentionIndices.push_back(currentPos[j]);
-      }
-    }
-
-    std::dynamic_pointer_cast<DecoderStateHardAtt>(state)->setAttentionIndices(
-        attentionIndices);
-  }
-
-  virtual void embeddingsFromPrediction(Ptr<ExpressionGraph> graph,
-                                        Ptr<DecoderState> state,
-                                        const std::vector<IndexType>& embIdx,
-                                        int dimBatch,
-                                        int beamSize) override {
-    DecoderBase::embeddingsFromPrediction(
-        graph, state, embIdx, dimBatch, beamSize);
-
-    auto stateHardAtt = std::dynamic_pointer_cast<DecoderStateHardAtt>(state);
-
-    size_t dimSrcWords
-        = state->getEncoderStates()[0]->getContext()->shape()[-3];
-
-    if(embIdx.empty()) {
-      stateHardAtt->setAttentionIndices({0});
-    } else {
-      for(size_t i = 0; i < embIdx.size(); ++i)
-        if(specialSymbols_.count(embIdx[i])) {
-          stateHardAtt->getAttentionIndices()[i]++;
-          if(stateHardAtt->getAttentionIndices()[i] >= dimSrcWords)
-            stateHardAtt->getAttentionIndices()[i] = (IndexType)dimSrcWords - 1;
-        }
-    }
-  }
-
-  void clear() override { rnn_ = nullptr; }
-};
-}  // namespace marian
diff --git a/src/models/model_factory.cpp b/src/models/model_factory.cpp
index 65629176..d42f07c8 100644
--- a/src/models/model_factory.cpp
+++ b/src/models/model_factory.cpp
@@ -6,7 +6,6 @@
 #include "models/costs.h"
 
 #include "models/amun.h"
-#include "models/hardatt.h"
 #include "models/nematus.h"
 #include "models/s2s.h"
 #include "models/transformer_factory.h"
@@ -47,11 +46,6 @@ Ptr<DecoderBase> DecoderFactory::construct() {
   if(options_->get<std::string>("type") == "transformer")
     // return New<DecoderTransformer>(options_);
     return NewDecoderTransformer(options_);
-  if(options_->get<std::string>("type") == "hard-att")
-    return New<DecoderHardAtt>(options_);
-  if(options_->get<std::string>("type") == "hard-soft-att")
-    return New<DecoderHardAtt>(options_);
-
   ABORT("Unknown decoder type");
 }
 
@@ -120,24 +114,6 @@ Ptr<ModelBase> by_type(std::string type, usage use, Ptr<Options> options) {
             .construct();
   }
 
-  if(type == "hard-att") {
-    return models::encoder_decoder()(options)
-        ("usage", use)
-        ("original-type", type)
-            .push_back(models::encoder()("type", "s2s"))
-            .push_back(models::decoder()("type", "hard-att"))
-            .construct();
-  }
-
-  if(type == "hard-soft-att") {
-    return models::encoder_decoder()(options)
-        ("usage", use)
-        ("original-type", type)
-            .push_back(models::encoder()("type", "s2s"))
-            .push_back(models::decoder()("type", "hard-soft-att"))
-            .construct();
-  }
-
   if(type == "multi-s2s") {
     size_t numEncoders = 2;
     auto ms2sFactory = models::encoder_decoder()(options)
@@ -172,25 +148,6 @@ Ptr<ModelBase> by_type(std::string type, usage use, Ptr<Options> options) {
     return ms2sFactory.construct();
   }
 
-  if(type == "multi-hard-att") {
-    size_t numEncoders = 2;
-    auto ms2sFactory = models::encoder_decoder()(options)
-        ("usage", use)
-        ("type", "s2s")
-        ("original-type", type);
-
-    for(size_t i = 0; i < numEncoders; ++i) {
-      auto prefix = "encoder" + std::to_string(i + 1);
-      ms2sFactory.push_back(models::encoder()("prefix", prefix)("index", i));
-    }
-
-    ms2sFactory.push_back(models::decoder()
-                          ("index", numEncoders)
-                          ("type", "hard-soft-att"));
-
-    return ms2sFactory.construct();
-  }
-
   if(type == "multi-transformer") {
     size_t numEncoders = 2;
     auto mtransFactory = models::encoder_decoder()(options)
diff --git a/src/rescorer/rescorer.h b/src/rescorer/rescorer.h
index bf57a2b6..fa456856 100644
--- a/src/rescorer/rescorer.h
+++ b/src/rescorer/rescorer.h
@@ -52,8 +52,12 @@ public:
     ABORT_IF(options_->has("summary") && options_->has("alignment"),
              "Alignments can not be produced with summarized score");
 
+    ABORT_IF(options_->has("summary") && options_->get<bool>("normalize"),
+             "Normalization by length cannot be used with summary scores");
+
     options_->set("inference", true);
-    options_->set("cost-type", "ce-rescore");
+    // @TODO: make normalize here a float and pass into loss to compute the same way as in decoding
+    options_->set("cost-type", options_->get<bool>("normalize") ? "ce-rescore-mean" : "ce-rescore");
 
     if(options_->get<bool>("n-best"))
       corpus_ = New<CorpusNBest>(options_);
@@ -97,6 +101,8 @@ public:
 
     std::string alignment = options_->get<std::string>("alignment", "");
     bool summarize = options_->has("summary");
+    bool normalize = options_->get<bool>("normalize");
+
     std::string summary = summarize ? options_->get<std::string>("summary") : "cross-entropy";
 
     float sumCost = 0;
@@ -118,7 +124,11 @@ public:
             builder = models_[id % graphs_.size()];
           }
 
+          // @TODO: normalize by length as in normalize
+          // Once we have Frank's concept of ce-sum with sample size by words we will return a pair
+          // here which will make it trivial to report all variants. 
           auto costNode = builder->build(graph, batch);
+
           graph->forward();
 
           std::vector<float> scores;
@@ -141,13 +151,29 @@ public:
               output->Write((long)batch->getSentenceIds()[i], scores[i], aligns[i]);
             }
           }
+
+          // progress heartbeat for MS-internal Philly compute cluster
+          // otherwise this job may be killed prematurely if no log for 4 hrs
+          if (getenv("PHILLY_JOB_ID")   // this environment variable exists when running on the cluster
+              && id % 1000 == 0)  // hard beat once every 1000 batches
+          {
+            auto progress = id / 10000.f; //fake progress for now, becomes >100 after 1M batches
+            fprintf(stdout, "PROGRESS: %.2f%%\n", progress);
+            fflush(stdout);
+          }
         };
 
-        pool.enqueue(task, batchId % graphs_.size());
-        batchId++;
+        pool.enqueue(task, batchId++);
       }
     }
 
+    if(normalize) {
+      LOG(info, "Total normalized log probs {} : Total sentences {} : Total words {}", sumCost, sumSamples, sumWords);
+      LOG(warn, "Sum of normalized log probs is a sum of averages");
+    } else {
+      LOG(info, "Total log probs {} : Total sentences {} : Total words {}", sumCost, sumSamples, sumWords);
+    }
+
     if(summarize) {
       float cost = 0;
       if(summary == "perplexity")
diff --git a/src/rescorer/score_collector.cpp b/src/rescorer/score_collector.cpp
index 65f43c70..ac118a6a 100644
--- a/src/rescorer/score_collector.cpp
+++ b/src/rescorer/score_collector.cpp
@@ -9,9 +9,14 @@ namespace marian {
 
 ScoreCollector::ScoreCollector(const Ptr<Options>& options)
     : nextId_(0),
-      outStrm_(new io::OutputFileStream(std::cout)),
       alignment_(options->get<std::string>("alignment", "")),
-      alignmentThreshold_(getAlignmentThreshold(alignment_)) {}
+      alignmentThreshold_(getAlignmentThreshold(alignment_)) {
+
+    if(options->get<std::string>("output") == "stdout")
+      outStrm_.reset(new io::OutputFileStream(std::cout));
+    else
+      outStrm_.reset(new io::OutputFileStream(options->get<std::string>("output")));
+  }
 
 void ScoreCollector::Write(long id, const std::string& message) {
   std::lock_guard<std::mutex> lock(mutex_);
diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp
index 0baeeb96..69923f87 100755
--- a/src/tensors/cpu/prod.cpp
+++ b/src/tensors/cpu/prod.cpp
@@ -99,7 +99,7 @@ void Prod(marian::Tensor C,
 }
 
 void ProdBatched(marian::Tensor C,
-                 Ptr<Allocator> allocator,
+                 Ptr<Allocator> /*allocator*/,
                  const marian::Tensor A,
                  const marian::Tensor B,
                  bool transA,
@@ -150,7 +150,7 @@ void ProdBatched(marian::Tensor C,
           (int)ldc);
   }
 #else
-  C; allocator; A; B; transA; transB; beta; scalar;
+  C; A; B; transA; transB; beta; scalar;
   ABORT("You need to compile with MKL in order to use the CPU version");
 #endif
 }
diff --git a/src/training/scheduler.h b/src/training/scheduler.h
index 8fc31d70..dee62496 100755
--- a/src/training/scheduler.h
+++ b/src/training/scheduler.h
@@ -246,8 +246,7 @@ public:
       } else {
         if(options_->get<bool>("lr-report")) {
           LOG(info,
-              "Ep. {} : Up. {} : Sen. {} : Cost {:.2f} : Time {:2f}s : {:.2f} words/s : L.r. "
-              "{:.4e}",
+              "Ep. {} : Up. {} : Sen. {} : Cost {:.8f} : Time {:.2f}s : {:.2f} words/s : L.r. {:.4e}",
               state_->epochs,
               state_->batches,
               utils::withCommas(state_->samplesEpoch),
@@ -257,7 +256,7 @@ public:
               state_->eta);
         } else {
           LOG(info,
-              "Ep. {} : Up. {} : Sen. {} : Cost {:.2f} : Time {:.2f}s : {:.2f} words/s",
+              "Ep. {} : Up. {} : Sen. {} : Cost {:.8f} : Time {:.2f}s : {:.2f} words/s",
               state_->epochs,
               state_->batches,
               utils::withCommas(state_->samplesEpoch),
diff --git a/src/translator/output_collector.cpp b/src/translator/output_collector.cpp
index c7114a56..58fba69b 100755
--- a/src/translator/output_collector.cpp
+++ b/src/translator/output_collector.cpp
@@ -6,6 +6,10 @@
 
 namespace marian {
 
+OutputCollector::OutputCollector()
+  : nextId_(0),
+    printing_(new DefaultPrinting()) {}
+
 OutputCollector::OutputCollector(std::string outFile)
   : nextId_(0),
     outStrm_(new io::OutputFileStream(std::cout)),
diff --git a/src/translator/output_collector.h b/src/translator/output_collector.h
index 154e8ded..51b47159 100755
--- a/src/translator/output_collector.h
+++ b/src/translator/output_collector.h
@@ -45,10 +45,13 @@ private:
 
 class OutputCollector {
 public:
-  OutputCollector(std::string outFile = "stdout");
+  OutputCollector();
+  OutputCollector(std::string outFile);
 
   template <class T>
-  OutputCollector(T&& arg) : nextId_(0), outStrm_(new io::OutputFileStream(arg)) {}
+  OutputCollector(T&& arg) 
+    : nextId_(0), 
+      outStrm_(new io::OutputFileStream(arg)) {}
 
   OutputCollector(const OutputCollector&) = delete;
 
diff --git a/src/translator/translator.h b/src/translator/translator.h
index cc2cbea2..9f973113 100755
--- a/src/translator/translator.h
+++ b/src/translator/translator.h
@@ -111,19 +111,21 @@ public:
                            bestn.str(),
                            options_->get<bool>("n-best"));
         }
+
+
+        // progress heartbeat for MS-internal Philly compute cluster
+        // otherwise this job may be killed prematurely if no log for 4 hrs
+        if (getenv("PHILLY_JOB_ID")   // this environment variable exists when running on the cluster
+            && id % 1000 == 0)  // hard beat once every 1000 batches
+        {
+          auto progress = 0.f; //fake progress for now
+          fprintf(stdout, "PROGRESS: %.2f%%\n", progress);
+          fflush(stdout);
+        }
       };
 
       threadPool.enqueue(task, batchId++);
 
-      // progress heartbeat for MS-internal Philly compute cluster
-      //otherwise this job may be killed prematurely if no log for 4 hrs
-      if (getenv("PHILLY_JOB_ID"))  // this environment variable exists when running on the cluster
-      {
-        auto progress = 0.f; //fake progress for now
-        fprintf(stdout, "PROGRESS: %.2f%%\n", progress);
-        fflush(stdout);
-      }
-
     }
   }
 };
author	Marcin Junczys-Dowmunt <marcinjd@microsoft.com>	2018-12-07 00:21:25 +0300
committer	Marcin Junczys-Dowmunt <marcinjd@microsoft.com>	2018-12-07 00:21:25 +0300
commit	1b2968c8b9465ce2225f304f5deea7f642f3e533 (patch)
tree	3dc8fd194c28a635ac57dea9951ba74f9c21cef6
parent	9562338ff78e226caad84ac29aa0be4e8b344368 (diff)
parent	e78d805955a5613e91cc3f2af1db2776a6c6e3da (diff)