diff options
author | Nick Nussbaum <nick.nussbaum@fticonsulting.com> | 2013-01-19 03:43:28 +0400 |
---|---|---|
committer | Nick Nussbaum <nick.nussbaum@fticonsulting.com> | 2013-01-19 03:43:28 +0400 |
commit | 754a3822b5f9a640ef440b8b3f43c6d344bf85cd (patch) | |
tree | 67023bdd203dfe6e0b5d78a897266513e24b0a87 /vowpalwabbit/vwdll.cpp | |
parent | bc091986b9464a85da487045565c9b78581e205b (diff) |
add utf-16 to utf-8 conversion functions to vwdll api for use from c# .net
the existing functions were modified and a new FcnA form added that
retains the current ansi/utf8 access
Diffstat (limited to 'vowpalwabbit/vwdll.cpp')
-rw-r--r-- | vowpalwabbit/vwdll.cpp | 56 |
1 files changed, 52 insertions, 4 deletions
diff --git a/vowpalwabbit/vwdll.cpp b/vowpalwabbit/vwdll.cpp index 1616ffcf..7fb6eed1 100644 --- a/vowpalwabbit/vwdll.cpp +++ b/vowpalwabbit/vwdll.cpp @@ -1,12 +1,34 @@ #include <memory> +#include <codecvt> +#include <locale> +#include <string> #include "vwdll.h" #include "parser.h" +// This interface now provides "wide" functions for compatibility with .NET interop +// The default functions assume a wide (16 bit char pointer) that is converted to a utf8-string and passed to +// a function which takes a narrow (8 bit char pointer) function. Both are exposed in the c/c++ API +// so that programs using 8 bit wide characters can use the direct call without conversion and +// programs using 16 bit characters can use the default wide versions of the functions. +// "Ansi versions (FcnA instead of Fcn) have only been written for functions which handle strings. + +// a future optimization would be to write an inner version of hash feature which either hashed the +// wide string directly (and live with the different hash values) or incorporate the UTF-16 to UTF-8 conversion +// in the hashing to avoid allocating an intermediate string. + extern "C" { + + VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_Initialize(const char16_t * pstrArgs) + { + std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert; + std::string sa(convert.to_bytes(pstrArgs)); + return VW_InitializeA(sa.c_str()); + } + - VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_Initialize(const char * pstrArgs) + VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_InitializeA(const char * pstrArgs) { std::auto_ptr<vw> inst(new vw); try @@ -59,7 +81,17 @@ extern "C" } } - VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char * line) + + VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char16_t * line) + { + std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert; + std::string sa(convert.to_bytes(line)); + return VW_ReadExampleA(handle, sa.c_str()); + } + + + + VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExampleA(VW_HANDLE handle, const char * line) { try { @@ -87,7 +119,14 @@ extern "C" } } - VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpace(VW_HANDLE handle, const char * s) + VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpace(VW_HANDLE handle, const char16_t * s) + { + std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert; + std::string sa(convert.to_bytes(s)); + return VW_HashSpaceA(handle,sa.c_str()); + } + + VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpaceA(VW_HANDLE handle, const char * s) { try { @@ -102,7 +141,16 @@ extern "C" } } - VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeature(VW_HANDLE handle, const char * s, unsigned long u) + + VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeature(VW_HANDLE handle, const char16_t * s, unsigned long u) + { + std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert; + std::string sa(convert.to_bytes(s)); + return VW_HashFeatureA(handle,sa.c_str(),u); + } + + + VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeatureA(VW_HANDLE handle, const char * s, unsigned long u) { try { |