#! /usr/bin/env python # vim: set fileencoding=utf-8 # (c) Uwe Kleine-König # GPLv2 import locale import sys f = file(sys.argv[1]) data = f.read() def len_utf8_char(data): def check_cont(num): if all(map(lambda c: ord(c) >= 0x80 and ord(c) <= 0xbf, data[1:num])): return num else: return -1 if ord(data[0]) < 128: # ASCII char return 1 elif ord(data[0]) & 0xe0 == 0xc0: return check_cont(2) elif ord(data[0]) & 0xf0 == 0xe0: return check_cont(3) elif ord(data[0]) & 0xf8 == 0xf0: return check_cont(4) elif ord(data[0]) & 0xfc == 0xf8: return check_cont(5) elif ord(data[0]) & 0xfe == 0xfc: return check_cont(6) i = 0 maxl = 0 while i < len(data): l = len_utf8_char(data[i:]) if l < 0: prefenc = locale.getpreferredencoding() if prefenc not in ('UTF-8', 'ANSI_X3.4-1968'): print prefenc else: print 'ISO-8859-1' sys.exit(0) if maxl < l: maxl = l i += l if maxl > 1: print 'UTF-8' else: print 'ANSI_X3.4-1968'