Small script to re-encode files that need it to UTF-8
authorRichard Levitte <levitte@openssl.org>
Mon, 13 Jul 2015 14:50:16 +0000 (16:50 +0200)
committerRichard Levitte <levitte@openssl.org>
Mon, 13 Jul 2015 23:13:52 +0000 (01:13 +0200)
This requires 'iconv' and that 'file' can take the options '-b' and '-i'.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(cherry picked from commit f608b4064d58ca4dfdfdfc921308b51cb96205e2)

util/toutf8.sh [new file with mode: 0644]

diff --git a/util/toutf8.sh b/util/toutf8.sh
new file mode 100644 (file)
index 0000000..8a4254b
--- /dev/null
@@ -0,0 +1,17 @@
+#! /bin/sh
+#
+# Very simple script to detect and convert files that we want to re-encode to UTF8
+
+git ls-tree -r --name-only HEAD | \
+    while read F; do
+       charset=`file -bi "$F" | sed -e 's|.*charset=||'`
+       if [ "$charset" != "utf-8" -a "$charset" != "binary" -a "$charset" != "us-ascii" ]; then
+           iconv -f ISO-8859-1 -t UTF8 < "$F" > "$F.utf8" && \
+               ( cmp -s "$F" "$F.utf8" || \
+                       ( echo "$F"
+                         mv "$F" "$F.iso-8859-1"
+                         mv "$F.utf8" "$F"
+                       )
+               )
+       fi
+    done