From 5a3ce86e21715a683ff0d32421ed5c6d5e84234d Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Mon, 13 Jul 2015 16:50:16 +0200 Subject: [PATCH] Small script to re-encode files that need it to UTF-8 This requires 'iconv' and that 'file' can take the options '-b' and '-i'. Reviewed-by: Rich Salz (cherry picked from commit f608b4064d58ca4dfdfdfc921308b51cb96205e2) --- util/toutf8.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 util/toutf8.sh diff --git a/util/toutf8.sh b/util/toutf8.sh new file mode 100644 index 0000000000..8a4254b3df --- /dev/null +++ b/util/toutf8.sh @@ -0,0 +1,17 @@ +#! /bin/sh +# +# Very simple script to detect and convert files that we want to re-encode to UTF8 + +git ls-tree -r --name-only HEAD | \ + while read F; do + charset=`file -bi "$F" | sed -e 's|.*charset=||'` + if [ "$charset" != "utf-8" -a "$charset" != "binary" -a "$charset" != "us-ascii" ]; then + iconv -f ISO-8859-1 -t UTF8 < "$F" > "$F.utf8" && \ + ( cmp -s "$F" "$F.utf8" || \ + ( echo "$F" + mv "$F" "$F.iso-8859-1" + mv "$F.utf8" "$F" + ) + ) + fi + done -- 2.34.1