]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - scripts/guess-charset
5.1-stable patches
[thirdparty/kernel/stable-queue.git] / scripts / guess-charset
1 #! /usr/bin/env python
2 # vim: set fileencoding=utf-8
3 # (c) Uwe Kleine-König <ukleine@strlen.de>
4 # GPLv2
5
6 import locale
7 import sys
8
9 f = file(sys.argv[1])
10 data = f.read()
11
12 def len_utf8_char(data):
13 def check_cont(num):
14 if all(map(lambda c: ord(c) >= 0x80 and ord(c) <= 0xbf, data[1:num])):
15 return num
16 else:
17 return -1
18
19 if ord(data[0]) < 128:
20 # ASCII char
21 return 1
22 elif ord(data[0]) & 0xe0 == 0xc0:
23 return check_cont(2)
24 elif ord(data[0]) & 0xf0 == 0xe0:
25 return check_cont(3)
26 elif ord(data[0]) & 0xf8 == 0xf0:
27 return check_cont(4)
28 elif ord(data[0]) & 0xfc == 0xf8:
29 return check_cont(5)
30 elif ord(data[0]) & 0xfe == 0xfc:
31 return check_cont(6)
32
33 i = 0
34 maxl = 0
35 while i < len(data):
36 l = len_utf8_char(data[i:])
37 if l < 0:
38 prefenc = locale.getpreferredencoding()
39 if prefenc not in ('UTF-8', 'ANSI_X3.4-1968'):
40 print prefenc
41 else:
42 print 'ISO-8859-1'
43 sys.exit(0)
44
45 if maxl < l:
46 maxl = l
47 i += l
48
49 if maxl > 1:
50 print 'UTF-8'
51 else:
52 print 'ANSI_X3.4-1968'