From f11a6d3a847e8e18faefd8694373d2f11b5ec802 Mon Sep 17 00:00:00 2001 From: Alexander Kuznetsov Date: Tue, 20 Mar 2012 02:17:37 +0000 Subject: Adds support for utf paths on Windows. Not all file formats/calls are supported yet. It will be expended. Please from now on use BLI_fopen, BLI_* for file manipulations. For non-windows systems BLI_fopen just calls fopen. For Windows, the utf-8 string is translated to utf-16 string in order to call UTF version of the function. --- intern/utfconv/utfconv.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 intern/utfconv/utfconv.c (limited to 'intern/utfconv/utfconv.c') diff --git a/intern/utfconv/utfconv.c b/intern/utfconv/utfconv.c new file mode 100644 index 00000000000..9aeca36e55a --- /dev/null +++ b/intern/utfconv/utfconv.c @@ -0,0 +1,232 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * Contributor(s): Alexandr Kuznetsov, Andrea Weikert + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#include "utfconv.h" + +size_t count_utf_8_from_16(wchar_t * string16) +{ + int i; + size_t count = 0; + wchar_t u = 0; + if(!string16) return 0; + + for(i=0;u = string16[i];i++) + { + if(u < 0x0080) count+=1; else + if(u < 0x0800) count+=2; else + if(u < 0xD800) count+=3; else + if(u < 0xDC00) { + i++; + if((u = string16[i])==0) break; + if(u >= 0xDC00 && u < 0xE000)count+=4; + } else + if(u < 0xE000) /*illigal*/; else + count+=3; + } + + return ++count; +} + + +size_t count_utf_16_from_8(char * string8) +{ + size_t count = 0; + char u; + char type = 0; + unsigned int u32 = 0; + + if(!string8) return 0; + + for(;(u = *string8);string8++) + { + if(type==0) + { + if((u&0x01<<7) == 0) {count++; u32 = 0; continue;} //1 utf-8 char + if((u&0x07<<5) == 0xC0) {type=1; u32 = u & 0x1F; continue;} //2 utf-8 char + if((u&0x0F<<4) == 0xE0) {type=2; u32 = u & 0x0F; continue;} //3 utf-8 char + if((u&0x1F<<3) == 0xF0) {type=3; u32 = u & 0x07; continue;} //4 utf-8 char + continue; + } else + { + if((u & 0xC0) == 0x80) {u32=(u32<<6) | (u&0x3F); type--;} else + {u32 = 0; type = 0;}; + } + if(type==0) + { + if((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) count++; else + if(0x10000 <= u32 && u32 < 0x110000) count+=2; + u32 = 0; + } + + } + + return ++count; +} + + + + +int conv_utf_16_to_8(wchar_t * in16, char * out8, size_t size8) +{ + char * out8end = out8+size8; + wchar_t u = 0; + int err = 0; + if(!size8 || !in16 || !out8) return UTF_ERROR_NULL_IN; + out8end--; + + for(; out8 < out8end && (u=*in16); in16++, out8++) + { + if(u < 0x0080) *out8 = u; else + if(u < 0x0800) { + if(out8 + 1 >= out8end) break; + *out8++=(0x3<<6) | (0x1F & (u>>6)); + *out8 =(0x1<<7) | (0x3F & (u)); + }else + if(u < 0xD800 || u >= 0xE000) { + if(out8 + 2 >= out8end) break; + *out8++=(0x7<<5) | (0xF & (u>>12)); + *out8++=(0x1<<7) | (0x3F & (u>>6));; + *out8 =(0x1<<7) | (0x3F & (u)); + }else + if(u < 0xDC00) { + wchar_t u2 = *++in16; + + if(!u2) break; + if(u2 >= 0xDC00 && u2 < 0xE000) + { + if(out8 + 3 >= out8end) break; else { + unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800)<<10); + + *out8++=(0xF<<4) | (0x7 & (uc>>18)); + *out8++=(0x1<<7) | (0x3F & (uc>>12)); + *out8++=(0x1<<7) | (0x3F & (uc>>6)); + *out8 =(0x1<<7) | (0x3F & (uc)); + } + } else {out8--; err|=UTF_ERROR_ILLCHAR;}; + } else + if(u < 0xE000) {out8--; err|=UTF_ERROR_ILLCHAR;} + + + } + + *out8=*out8end=0; + + if(*in16) err|=UTF_ERROR_SMALL; + + return err; +} + + +int conv_utf_8_to_16(char * in8, wchar_t * out16, size_t size16) +{ + char u; + char type = 0; + wchar_t u32 = 0; + wchar_t * out16end = out16+size16; + int err = 0; + if(!size16 || !in8 || !out16) return UTF_ERROR_NULL_IN; + out16end--; + + for(;out16= out16end) break; + u32-=0x10000; + *out16 = 0xD800 + (u32 >> 10); + out16++; + *out16 = 0xDC00 + (u32 & 0x3FF); + out16++; + }; + u32 = 0; + } + + } + + *out16=*out16end=0; + + if(*in8) err|=UTF_ERROR_SMALL; + + return err; +} + +int is_ascii(char * in8) +{ + for(in8; *in8; in8++) + if(0x80 & *in8) return 0; + + return 1; +} + +void utf_8_cut_end(char * inout8, size_t maxcutpoint) +{ + const char * start = inout8; + char * cur = inout8 + maxcutpoint; + char cc; + if(!inout8) return; + + cc = *cur; + + + + +} + + + +char * alloc_utf_8_from_16(wchar_t * in16, size_t add) +{ + size_t bsize = count_utf_8_from_16(in16); + char * out8 = NULL; + if(!bsize) return NULL; + out8 = (char*)malloc(sizeof(char) * (bsize + add)); + conv_utf_16_to_8(in16,out8, bsize); + return out8; +} + +wchar_t * alloc_utf16_from_8(char * in8, size_t add) +{ + size_t bsize = count_utf_16_from_8(in8); + wchar_t * out16 = NULL; + if(!bsize) return NULL; + out16 =(wchar_t*) malloc(sizeof(wchar_t) * (bsize + add)); + conv_utf_8_to_16(in8,out16, bsize); + return out16; +} + -- cgit v1.2.3