Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/winsup
diff options
context:
space:
mode:
authorKeith Marshall <keithmarshall@@users.sf.net>2007-03-09 02:15:58 +0300
committerKeith Marshall <keithmarshall@@users.sf.net>2007-03-09 02:15:58 +0300
commitaf8e63023a915511a640aaa6e22c96cf9fe10ba8 (patch)
tree9fe6d564b3823d9313cdc5d63fffeb956d5d68f9 /winsup
parent062bfdacdb630fd2cc36bf45edb3041337bfc957 (diff)
Make basename and dirname functions work with path names
containing multibyte character strings.
Diffstat (limited to 'winsup')
-rw-r--r--winsup/mingw/ChangeLog6
-rwxr-xr-xwinsup/mingw/mingwex/basename.c116
-rwxr-xr-xwinsup/mingw/mingwex/dirname.c167
3 files changed, 210 insertions, 79 deletions
diff --git a/winsup/mingw/ChangeLog b/winsup/mingw/ChangeLog
index f225db48f..b0c559ae0 100644
--- a/winsup/mingw/ChangeLog
+++ b/winsup/mingw/ChangeLog
@@ -1,3 +1,9 @@
+2007-03-08 Keith Marshall <keithmarshall@users.sourceforge.net>
+
+ * mingwex/basename.c: Make it work with path names containing
+ multibyte character strings.
+ * mingwex/dirname.c: Likewise.
+
2007-03-05 Danny Smith <dannysmith@users.sourceforge.net>
* include/io.h (__mingw_access): New static inline wrapper to restore
diff --git a/winsup/mingw/mingwex/basename.c b/winsup/mingw/mingwex/basename.c
index 768b9a0f4..c531cab58 100755
--- a/winsup/mingw/mingwex/basename.c
+++ b/winsup/mingw/mingwex/basename.c
@@ -20,8 +20,10 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <libgen.h>
+#include <locale.h>
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
#define __cdecl /* this may not be defined. */
@@ -29,61 +31,114 @@
__cdecl char *basename( char *path )
{
- char *retname;
- static char retfail[] = ".";
+ size_t len;
+ static char *retfail = NULL;
+
+ /* to handle path names for files in multibyte character locales,
+ * we need to set up LC_CTYPE to match the host file system locale
+ */
+
+ char *locale = setlocale( LC_CTYPE, NULL );
+ if( locale != NULL ) locale = strdup( locale );
+ setlocale( LC_CTYPE, "" );
if( path && *path )
{
- /* step over the drive designator, if present ...
- * (FIXME: maybe should confirm *path is a valid drive designator).
+ /* allocate sufficient local storage space,
+ * in which to create a wide character reference copy of path
+ */
+
+ wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
+
+ /* create the wide character reference copy of path,
+ * and step over the drive designator, if present ...
*/
- if( path[1] == ':' )
- path += 2;
+ wchar_t *refpath = refcopy;
+ if( ((len = mbstowcs( refpath, path, len )) > 1) && (refpath[1] == L':') )
+ {
+ /* FIXME: maybe should confirm *refpath is a valid drive designator */
+
+ refpath += 2;
+ }
+
+ /* ensure that our wide character reference path is NUL terminated */
+
+ refcopy[ len ] = L'\0';
/* check again, just to ensure we still have a non-empty path name ... */
- if( *path )
+ if( *refpath )
{
- /* and, when we do ...
- * scan from left to right, to the char after the final dir separator
+ /* and, when we do, process it in the wide character domain ...
+ * scanning from left to right, to the char after the final dir separator
*/
- for( retname = path ; *path ; ++path )
+ wchar_t *refname;
+ for( refname = refpath ; *refpath ; ++refpath )
{
- if( (*path == '/') || (*path == '\\') )
+ if( (*refpath == L'/') || (*refpath == L'\\') )
{
/* we found a dir separator ...
* step over it, and any others which immediately follow it
*/
- while( (*path == '/') || (*path == '\\') )
- ++path;
+ while( (*refpath == L'/') || (*refpath == L'\\') )
+ ++refpath;
/* if we didn't reach the end of the path string ... */
- if( *path )
+ if( *refpath )
/* then we have a new candidate for the base name */
- retname = path;
+ refname = refpath;
/* otherwise ...
* strip off any trailing dir separators which we found
*/
- else while( (path > retname) && ((*--path == '/') || (*path == '\\')) )
- *path = '\0';
+ else while( (refpath > refname)
+ && ((*--refpath == L'/') || (*refpath == L'\\')) )
+ *refpath = L'\0';
}
}
- /* retname now points at the resolved base name ...
- * if it's not empty, then we return it as it is, otherwise ...
- * we must have had only dir separators in the original path name,
- * so we return "/".
+ /* in the wide character domain ...
+ * refname now points at the resolved base name ...
*/
- return *retname ? retname : strcpy( retfail, "/" );
+ if( *refname )
+ {
+ /* if it's not empty,
+ * then we transform the full normalised path back into
+ * the multibyte character domain, and skip over the dirname,
+ * to return the resolved basename.
+ */
+
+ if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
+ path[ len ] = '\0';
+ *refname = L'\0';
+ if( (len = wcstombs( NULL, refcopy, 0 )) != (size_t)(-1) )
+ path += len;
+ }
+
+ else
+ {
+ /* the basename is empty, so return the default value of "/",
+ * transforming from wide char to multibyte char domain, and
+ * returning it in our own buffer.
+ */
+
+ retfail = realloc( retfail, len = 1 + wcstombs( NULL, L"/", 0 ));
+ wcstombs( path = retfail, L"/", len );
+ }
+
+ /* restore the caller's locale, clean up, and return the result */
+
+ setlocale( LC_CTYPE, locale );
+ free( locale );
+ return( path );
}
/* or we had an empty residual path name, after the drive designator,
@@ -93,11 +148,20 @@ __cdecl char *basename( char *path )
/* and, if we get to here ...
* the path name is either NULL, or it decomposes to an empty string;
- * in either case, we return the default value of "." in our static buffer,
- * (but strcpy it, just in case the caller trashed it after a previous call).
+ * in either case, we return the default value of "." in our own buffer,
+ * reloading it with the correct value, transformed from the wide char
+ * to the multibyte char domain, just in case the caller trashed it
+ * after a previous call.
*/
- return strcpy( retfail, "." );
+ retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
+ wcstombs( retfail, L".", len );
+
+ /* restore the caller's locale, clean up, and return the result */
+
+ setlocale( LC_CTYPE, locale );
+ free( locale );
+ return( retfail );
}
-/* $RCSfile$: end of file */
+/* $RCSfile$$Revision$: end of file */
diff --git a/winsup/mingw/mingwex/dirname.c b/winsup/mingw/mingwex/dirname.c
index 44256b19c..20d7a30bb 100755
--- a/winsup/mingw/mingwex/dirname.c
+++ b/winsup/mingw/mingwex/dirname.c
@@ -20,8 +20,10 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <libgen.h>
+#include <locale.h>
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
#define __cdecl /* this may not be defined. */
@@ -29,12 +31,30 @@
__cdecl char *dirname( char *path )
{
- static char retfail[] = "?:.";
- char *retname, *basename, *copyptr = retfail;
+ size_t len;
+ static char *retfail = NULL;
+
+ /* to handle path names for files in multibyte character locales,
+ * we need to set up LC_CTYPE to match the host file system locale.
+ */
+
+ char *locale = setlocale( LC_CTYPE, NULL );
+ if( locale != NULL ) locale = strdup( locale );
+ setlocale( LC_CTYPE, "" );
if( path && *path )
{
- retname = path;
+ /* allocate sufficient local storage space,
+ * in which to create a wide character reference copy of path
+ */
+
+ wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
+
+ /* create the wide character reference copy of path */
+
+ wchar_t *refpath = refcopy;
+ len = mbstowcs( refpath, path, len );
+ refcopy[ len ] = L'\0';
/* SUSv3 identifies a special case, where path is exactly equal to "//";
* (we will also accept "\\" in the Win32 context, but not "/\" or "\/",
@@ -44,48 +64,55 @@ __cdecl char *dirname( char *path )
* simply return the path unchanged, (i.e. "//" or "\\").
*/
- if( (*path == '/') || (*path == '\\') )
+ if( (len > 1) && ((refpath[0] == L'/') || (refpath[0] == L'\\')) )
{
- if( (path[1] == *retname) && (path[2] == '\0') )
- return retname;
+ if( (refpath[1] == refpath[0]) && (refpath[2] == L'\0') )
+ {
+ setlocale( LC_CTYPE, locale );
+ free( locale );
+ return path;
+ }
}
/* For all other cases ...
- * step over the drive designator, if present, copying it to retfail ...
- * (FIXME: maybe should confirm *path is a valid drive designator).
+ * step over the drive designator, if present ...
*/
- else if( *path && (path[1] == ':') )
+ else if( (len > 1) && (refpath[1] == L':') )
{
- *copyptr++ = *path++;
- *copyptr++ = *path++;
+ /* FIXME: maybe should confirm *refpath is a valid drive designator */
+
+ refpath += 2;
}
- if( *path )
+ /* check again, just to ensure we still have a non-empty path name ... */
+
+ if( *refpath )
{
/* reproduce the scanning logic of the "basename" function
* to locate the basename component of the current path string,
* (but also remember where the dirname component starts).
*/
- for( retname = basename = path ; *path ; ++path )
+ wchar_t *refname, *basename;
+ for( refname = basename = refpath ; *refpath ; ++refpath )
{
- if( (*path == '/') || (*path == '\\') )
+ if( (*refpath == L'/') || (*refpath == L'\\') )
{
/* we found a dir separator ...
* step over it, and any others which immediately follow it
*/
- while( (*path == '/') || (*path == '\\') )
- ++path;
+ while( (*refpath == L'/') || (*refpath == L'\\') )
+ ++refpath;
/* if we didn't reach the end of the path string ... */
- if( *path )
+ if( *refpath )
/* then we have a new candidate for the base name */
- basename = path;
+ basename = refpath;
else
@@ -102,73 +129,107 @@ __cdecl char *dirname( char *path )
* to confirm that we have distinct dirname and basename components
*/
- if( basename > retname )
+ if( basename > refname )
{
/* and, when we do ...
* backtrack over all trailing separators on the dirname component,
* (but preserve exactly two initial dirname separators, if identical),
- * and add a NULL terminator in their place.
+ * and add a NUL terminator in their place.
*/
- --basename;
- while( (basename > retname) && ((*basename == '/') || (*basename == '\\')) )
- --basename;
- if( (basename == retname) && ((*retname == '/') || (*retname == '\\'))
- && (retname[1] == *retname) && (retname[2] != '/') && (retname[2] != '\\') )
+ do --basename;
+ while( (basename > refname) && ((*basename == L'/') || (*basename == L'\\')) );
+ if( (basename == refname) && ((refname[0] == L'/') || (refname[0] == L'\\'))
+ && (refname[1] == refname[0]) && (refname[2] != L'/') && (refname[2] != L'\\') )
++basename;
- *++basename = '\0';
-
- /* adjust the start point of the dirname,
- * to accommodate the Win32 drive designator, if it was present.
- */
-
- if( copyptr > retfail )
- retname -= 2;
+ *++basename = L'\0';
/* if the resultant dirname begins with EXACTLY two dir separators,
* AND both are identical, then we preserve them.
*/
- path = copyptr = retname;
- while( ((*path == '/') || (*path == '\\')) )
- ++path;
- if( ((path - retname) == 2) && (*++copyptr == *retname) )
- ++copyptr;
+ refpath = refcopy;
+ while( ((*refpath == L'/') || (*refpath == L'\\')) )
+ ++refpath;
+ if( ((refpath - refcopy) > 2) || (refcopy[1] != refcopy[0]) )
+ refpath = refcopy;
/* and finally ...
* we remove any residual, redundantly duplicated separators from the dirname,
* reterminate, and return it.
*/
- path = copyptr;
- while( *path )
+ refname = refpath;
+ while( *refpath )
{
- if( ((*copyptr++ = *path) == '/') || (*path++ == '\\') )
+ if( ((*refname++ = *refpath) == L'/') || (*refpath++ == L'\\') )
{
- while( (*path == '/') || (*path == '\\') )
- ++path;
+ while( (*refpath == L'/') || (*refpath == L'\\') )
+ ++refpath;
}
}
- *copyptr = '\0';
- return retname;
+ *refname = L'\0';
+
+ /* finally ...
+ * transform the resolved dirname back into the multibyte char domain,
+ * restore the caller's locale, and return the resultant dirname
+ */
+
+ if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
+ path[ len ] = '\0';
}
- else if( (*retname == '/') || (*retname == '\\') )
+ else
{
- *copyptr++ = *retname;
- *copyptr = '\0';
- return retfail;
+ /* either there were no dirname separators in the path name,
+ * or there was nothing else ...
+ */
+
+ if( (*refname == L'/') || (*refname == L'\\') )
+ {
+ /* it was all separators, so return one */
+
+ ++refname;
+ }
+
+ else
+ {
+ /* there were no separators, so return '.' */
+
+ *refname++ = L'.';
+ }
+
+ /* add a NUL terminator, in either case,
+ * then transform to the multibyte char domain,
+ * using our own buffer
+ */
+
+ *refname = L'\0';
+ retfail = realloc( retfail, len = 1 + wcstombs( NULL, refcopy, 0 ));
+ wcstombs( path = retfail, refcopy, len );
}
+
+ /* restore caller's locale, clean up, and return the resolved dirname */
+
+ setlocale( LC_CTYPE, locale );
+ free( locale );
+ return path;
}
}
/* path is NULL, or an empty string; default return value is "." ...
- * return this in our own static buffer, but strcpy it, just in case
- * the caller trashed it after a previous call.
+ * return this in our own buffer, regenerated by wide char transform,
+ * in case the caller trashed it after a previous call.
*/
- strcpy( copyptr, "." );
+ retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
+ wcstombs( retfail, L".", len );
+
+ /* restore caller's locale, clean up, and return the default dirname */
+
+ setlocale( LC_CTYPE, locale );
+ free( locale );
return retfail;
}
-/* $RCSfile$: end of file */
+/* $RCSfile$$Revision$: end of file */