Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Nussbaum <nick.nussbaum@fticonsulting.com>2013-01-19 03:43:28 +0400
committerNick Nussbaum <nick.nussbaum@fticonsulting.com>2013-01-19 03:43:28 +0400
commit754a3822b5f9a640ef440b8b3f43c6d344bf85cd (patch)
tree67023bdd203dfe6e0b5d78a897266513e24b0a87 /vowpalwabbit/vwdll.cpp
parentbc091986b9464a85da487045565c9b78581e205b (diff)
add utf-16 to utf-8 conversion functions to vwdll api for use from c# .net
the existing functions were modified and a new FcnA form added that retains the current ansi/utf8 access
Diffstat (limited to 'vowpalwabbit/vwdll.cpp')
-rw-r--r--vowpalwabbit/vwdll.cpp56
1 files changed, 52 insertions, 4 deletions
diff --git a/vowpalwabbit/vwdll.cpp b/vowpalwabbit/vwdll.cpp
index 1616ffcf..7fb6eed1 100644
--- a/vowpalwabbit/vwdll.cpp
+++ b/vowpalwabbit/vwdll.cpp
@@ -1,12 +1,34 @@
#include <memory>
+#include <codecvt>
+#include <locale>
+#include <string>
#include "vwdll.h"
#include "parser.h"
+// This interface now provides "wide" functions for compatibility with .NET interop
+// The default functions assume a wide (16 bit char pointer) that is converted to a utf8-string and passed to
+// a function which takes a narrow (8 bit char pointer) function. Both are exposed in the c/c++ API
+// so that programs using 8 bit wide characters can use the direct call without conversion and
+// programs using 16 bit characters can use the default wide versions of the functions.
+// "Ansi versions (FcnA instead of Fcn) have only been written for functions which handle strings.
+
+// a future optimization would be to write an inner version of hash feature which either hashed the
+// wide string directly (and live with the different hash values) or incorporate the UTF-16 to UTF-8 conversion
+// in the hashing to avoid allocating an intermediate string.
+
extern "C"
{
+
+ VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_Initialize(const char16_t * pstrArgs)
+ {
+ std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
+ std::string sa(convert.to_bytes(pstrArgs));
+ return VW_InitializeA(sa.c_str());
+ }
+
- VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_Initialize(const char * pstrArgs)
+ VW_DLL_MEMBER VW_HANDLE VW_CALLING_CONV VW_InitializeA(const char * pstrArgs)
{
std::auto_ptr<vw> inst(new vw);
try
@@ -59,7 +81,17 @@ extern "C"
}
}
- VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char * line)
+
+ VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char16_t * line)
+ {
+ std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
+ std::string sa(convert.to_bytes(line));
+ return VW_ReadExampleA(handle, sa.c_str());
+ }
+
+
+
+ VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExampleA(VW_HANDLE handle, const char * line)
{
try
{
@@ -87,7 +119,14 @@ extern "C"
}
}
- VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpace(VW_HANDLE handle, const char * s)
+ VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpace(VW_HANDLE handle, const char16_t * s)
+ {
+ std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
+ std::string sa(convert.to_bytes(s));
+ return VW_HashSpaceA(handle,sa.c_str());
+ }
+
+ VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashSpaceA(VW_HANDLE handle, const char * s)
{
try
{
@@ -102,7 +141,16 @@ extern "C"
}
}
- VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeature(VW_HANDLE handle, const char * s, unsigned long u)
+
+ VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeature(VW_HANDLE handle, const char16_t * s, unsigned long u)
+ {
+ std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
+ std::string sa(convert.to_bytes(s));
+ return VW_HashFeatureA(handle,sa.c_str(),u);
+ }
+
+
+ VW_DLL_MEMBER uint32_t VW_CALLING_CONV VW_HashFeatureA(VW_HANDLE handle, const char * s, unsigned long u)
{
try
{