From bb92e2c89b4aee9e1d1bb27a4a6da3817c66d005 Mon Sep 17 00:00:00 2001
From: Andy Polyakov <appro@openssl.org>
Date: Wed, 28 Apr 2010 20:02:28 +0000
Subject: [PATCH] bss_file.c: refine UTF-8 logic on Windows.

---
 crypto/bio/bss_file.c     | 53 ++++++++++++++++++++++++++-------------
 doc/crypto/BIO_s_file.pod |  4 +++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/crypto/bio/bss_file.c b/crypto/bio/bss_file.c
index 3f458a0c7c..8bfa0bcd97 100644
--- a/crypto/bio/bss_file.c
+++ b/crypto/bio/bss_file.c
@@ -118,28 +118,45 @@ static BIO_METHOD methods_filep=
 
 BIO *BIO_new_file(const char *filename, const char *mode)
 	{
-	BIO *ret;
-	FILE *file;
+	BIO  *ret;
+	FILE *file=NULL;
 
-	file=fopen(filename,mode);	
 #if defined(_WIN32) && defined(CP_UTF8)
-	if (file==NULL && errno==ENOENT) /* see if filename is UTF-8 encoded */
+	int sz, len_0 = (int)strlen(filename)+1;
+
+	/*
+	 * Basically there are three cases to cover: a) filename is
+	 * pure ASCII string; b) actual UTF-8 encoded string and
+	 * c) locale-ized string, i.e. one containing 8-bit
+	 * characters that are meaningful in current system locale.
+	 * If filename is pure ASCII or real UTF-8 encoded string,
+	 * MultiByteToWideChar succeeds and _wfopen works. If
+	 * filename is locale-ized string, chances are that
+	 * MultiByteToWideChar fails reporting
+	 * ERROR_NO_UNICODE_TRANSLATION, in which case we fall
+	 * back to fopen...
+	 */
+	if ((sz=MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
+					filename,len_0,NULL,0))>0)
 		{
-		int sz,len_0 = (int)strlen(filename)+1;
-		if ((sz=MultiByteToWideChar(CP_UTF8,0,filename,len_0,
-					    NULL,0))>0)
-			{
-			WCHAR wmode[8];
-			WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
-
-			if (MultiByteToWideChar(CP_UTF8,0,filename,len_0,
-						wfilename,sz) &&
-			    MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
-			    			wmode,sizeof(wmode)/sizeof(wmode[0]))
-			   )
-				file = _wfopen(wfilename,wmode);
-			}
+		WCHAR  wmode[8];
+		WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
+
+		if (MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
+					filename,len_0,wfilename,sz) &&
+		    MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
+			    		wmode,sizeof(wmode)/sizeof(wmode[0])) &&
+		    (file=_wfopen(wfilename,wmode))==NULL && errno==ENOENT
+		   )	/* UTF-8 decode succeeded, but no file, filename
+			 * could still have been locale-ized... */
+			file = fopen(filename,mode);
+		}
+	else if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION)
+		{
+		file = fopen(filename,mode);
 		}
+#else
+	file=fopen(filename,mode);	
 #endif
 	if (file == NULL)
 		{
diff --git a/doc/crypto/BIO_s_file.pod b/doc/crypto/BIO_s_file.pod
index b2a29263f4..188aea347d 100644
--- a/doc/crypto/BIO_s_file.pod
+++ b/doc/crypto/BIO_s_file.pod
@@ -76,6 +76,10 @@ normally be closed so the BIO_NOCLOSE flag should be set.
 Because the file BIO calls the underlying stdio functions any quirks
 in stdio behaviour will be mirrored by the corresponding BIO.
 
+On Windows BIO_new_files reserves for the filename argument to be
+UTF-8 encoded. In other words if you have to make it work in multi-
+lingual environment, encode file names in UTF-8.
+
 =head1 EXAMPLES
 
 File BIO "hello world":
-- 
2.34.1