bss_file.c: refine UTF-8 logic on Windows.
authorAndy Polyakov <appro@openssl.org>
Wed, 28 Apr 2010 20:02:28 +0000 (20:02 +0000)
committerAndy Polyakov <appro@openssl.org>
Wed, 28 Apr 2010 20:02:28 +0000 (20:02 +0000)
crypto/bio/bss_file.c
doc/crypto/BIO_s_file.pod

index 3f458a0c7cb3817faddd34c1efe4ca9928652d13..8bfa0bcd97d7a2a9052f4f63d1a9860f92a3cf53 100644 (file)
@@ -118,28 +118,45 @@ static BIO_METHOD methods_filep=
 
 BIO *BIO_new_file(const char *filename, const char *mode)
        {
-       BIO *ret;
-       FILE *file;
+       BIO  *ret;
+       FILE *file=NULL;
 
-       file=fopen(filename,mode);      
 #if defined(_WIN32) && defined(CP_UTF8)
-       if (file==NULL && errno==ENOENT) /* see if filename is UTF-8 encoded */
+       int sz, len_0 = (int)strlen(filename)+1;
+
+       /*
+        * Basically there are three cases to cover: a) filename is
+        * pure ASCII string; b) actual UTF-8 encoded string and
+        * c) locale-ized string, i.e. one containing 8-bit
+        * characters that are meaningful in current system locale.
+        * If filename is pure ASCII or real UTF-8 encoded string,
+        * MultiByteToWideChar succeeds and _wfopen works. If
+        * filename is locale-ized string, chances are that
+        * MultiByteToWideChar fails reporting
+        * ERROR_NO_UNICODE_TRANSLATION, in which case we fall
+        * back to fopen...
+        */
+       if ((sz=MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
+                                       filename,len_0,NULL,0))>0)
                {
-               int sz,len_0 = (int)strlen(filename)+1;
-               if ((sz=MultiByteToWideChar(CP_UTF8,0,filename,len_0,
-                                           NULL,0))>0)
-                       {
-                       WCHAR wmode[8];
-                       WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
-
-                       if (MultiByteToWideChar(CP_UTF8,0,filename,len_0,
-                                               wfilename,sz) &&
-                           MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
-                                               wmode,sizeof(wmode)/sizeof(wmode[0]))
-                          )
-                               file = _wfopen(wfilename,wmode);
-                       }
+               WCHAR  wmode[8];
+               WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
+
+               if (MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
+                                       filename,len_0,wfilename,sz) &&
+                   MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
+                                       wmode,sizeof(wmode)/sizeof(wmode[0])) &&
+                   (file=_wfopen(wfilename,wmode))==NULL && errno==ENOENT
+                  )    /* UTF-8 decode succeeded, but no file, filename
+                        * could still have been locale-ized... */
+                       file = fopen(filename,mode);
+               }
+       else if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION)
+               {
+               file = fopen(filename,mode);
                }
+#else
+       file=fopen(filename,mode);      
 #endif
        if (file == NULL)
                {
index b2a29263f4cd45d5eff2c47f5c506c1407a9e56b..188aea347daee06b721664b16d879485a9546e47 100644 (file)
@@ -76,6 +76,10 @@ normally be closed so the BIO_NOCLOSE flag should be set.
 Because the file BIO calls the underlying stdio functions any quirks
 in stdio behaviour will be mirrored by the corresponding BIO.
 
+On Windows BIO_new_files reserves for the filename argument to be
+UTF-8 encoded. In other words if you have to make it work in multi-
+lingual environment, encode file names in UTF-8.
+
 =head1 EXAMPLES
 
 File BIO "hello world":