[thirdparty/glibc.git] / localedata / unicode-gen / utf8_compatibility.py

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (C) 2014-2019 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <http://www.gnu.org/licenses/>.

'''
This script is useful for checking backward compatibility of newly
generated UTF-8 file from utf8_gen.py script

To see how this script is used, call it with the “-h” option:

    $ ./utf8_compatibility.py -h
    … prints usage message …
'''

import sys
import re
import argparse
import unicode_utils

def create_charmap_dictionary(file_name):
    '''Create a dictionary for all code points found in the CHARMAP
    section of a file
    '''
    with open(file_name, mode='r') as utf8_file:
        charmap_dictionary = {}
        for line in utf8_file:
            if line.startswith('CHARMAP'):
                break
        for line in utf8_file:
            if line.startswith('END CHARMAP'):
                return charmap_dictionary
            if line.startswith('%'):
                continue
            match = re.match(
                r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
                +r'(:?\.\.<U(?P<codepoint2>[0-9-A-F]{4,8})>)?'
                +r'\s+(?P<hexutf8>(/x[0-9a-f]{2}){1,4})',
                line)
            if not match:
                continue
            codepoint1 = match.group('codepoint1')
            codepoint2 = match.group('codepoint2')
            if not codepoint2:
                codepoint2 = codepoint1
            for i in range(int(codepoint1, 16),
                           int(codepoint2, 16) + 1):
                charmap_dictionary[i] = match.group('hexutf8')
        sys.stderr.write('No “CHARMAP” or no “END CHARMAP” found in %s\n'
                         %file_name)
        exit(1)

def check_charmap(original_file_name, new_file_name):
    '''Report differences in the CHARMAP section between the old and the
    new file
    '''
    print('************************************************************')
    print('Report on CHARMAP:')
    ocharmap = create_charmap_dictionary(original_file_name)
    ncharmap = create_charmap_dictionary(new_file_name)
    print('------------------------------------------------------------')
    print('Total removed characters in newly generated CHARMAP: %d'
          %len(set(ocharmap)-set(ncharmap)))
    if ARGS.show_missing_characters:
        for key in sorted(set(ocharmap)-set(ncharmap)):
            print('removed: {:s}     {:s} {:s}'.format(
                unicode_utils.ucs_symbol(key),
                ocharmap[key],
                unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
                if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
    print('------------------------------------------------------------')
    changed_charmap = {}
    for key in set(ocharmap).intersection(set(ncharmap)):
        if ocharmap[key] != ncharmap[key]:
            changed_charmap[key] = (ocharmap[key], ncharmap[key])
    print('Total changed characters in newly generated CHARMAP: %d'
          %len(changed_charmap))
    if ARGS.show_changed_characters:
        for key in sorted(changed_charmap):
            print('changed: {:s}     {:s}->{:s} {:s}'.format(
                unicode_utils.ucs_symbol(key),
                changed_charmap[key][0],
                changed_charmap[key][1],
                unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
                if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
    print('------------------------------------------------------------')
    print('Total added characters in newly generated CHARMAP: %d'
          %len(set(ncharmap)-set(ocharmap)))
    if ARGS.show_added_characters:
        for key in sorted(set(ncharmap)-set(ocharmap)):
            print('added: {:s}     {:s} {:s}'.format(
                unicode_utils.ucs_symbol(key),
                ncharmap[key],
                unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
                if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))

def create_width_dictionary(file_name):
    '''Create a dictionary for all code points found in the WIDTH
    section of a file
    '''
    with open(file_name, mode='r') as utf8_file:
        width_dictionary = {}
        for line in utf8_file:
            if line.startswith('WIDTH'):
                break
        for line in utf8_file:
            if line.startswith('END WIDTH'):
                return width_dictionary
            match = re.match(
                r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
                +r'(:?\.\.\.<U(?P<codepoint2>[0-9-A-F]{4,8})>)?'
                +r'\s+(?P<width>[02])',
                line)
            if not match:
                continue
            codepoint1 = match.group('codepoint1')
            codepoint2 = match.group('codepoint2')
            if not codepoint2:
                codepoint2 = codepoint1
            for i in range(int(codepoint1, 16),
                           int(codepoint2, 16) + 1):
                width_dictionary[i] = int(match.group('width'))
        sys.stderr.write('No “WIDTH” or no “END WIDTH” found in %s\n' %file)

def check_width(original_file_name, new_file_name):
    '''Report differences in the WIDTH section between the old and the new
    file
    '''
    print('************************************************************')
    print('Report on WIDTH:')
    owidth = create_width_dictionary(original_file_name)
    nwidth = create_width_dictionary(new_file_name)
    print('------------------------------------------------------------')
    print('Total removed characters in newly generated WIDTH: %d'
          %len(set(owidth)-set(nwidth)))
    print('(Characters not in WIDTH get width 1 by default, '
          + 'i.e. these have width 1 now.)')
    if ARGS.show_missing_characters:
        for key in sorted(set(owidth)-set(nwidth)):
            print('removed: {:s} '.format(unicode_utils.ucs_symbol(key))
                  + '{:d} : '.format(owidth[key])
                  + 'eaw={:s} '.format(
                      unicode_utils.EAST_ASIAN_WIDTHS[key]
                      if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
                  + 'category={:2s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['category']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'bidi={:3s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'name={:s}'.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['name']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
    print('------------------------------------------------------------')
    changed_width = {}
    for key in set(owidth).intersection(set(nwidth)):
        if owidth[key] != nwidth[key]:
            changed_width[key] = (owidth[key], nwidth[key])
    print('Total changed characters in newly generated WIDTH: %d'
          %len(changed_width))
    if ARGS.show_changed_characters:
        for key in sorted(changed_width):
            print('changed width: {:s} '.format(unicode_utils.ucs_symbol(key))
                  + '{:d}->{:d} : '.format(changed_width[key][0],
                                          changed_width[key][1])
                  + 'eaw={:s} '.format(
                      unicode_utils.EAST_ASIAN_WIDTHS[key]
                      if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
                  + 'category={:2s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['category']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'bidi={:3s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'name={:s}'.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['name']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
    print('------------------------------------------------------------')
    print('Total added characters in newly generated WIDTH: %d'
          %len(set(nwidth)-set(owidth)))
    print('(Characters not in WIDTH get width 1 by default, '
          + 'i.e. these had width 1 before.)')
    if ARGS.show_added_characters:
        for key in sorted(set(nwidth)-set(owidth)):
            print('added: {:s} '.format(unicode_utils.ucs_symbol(key))
                  + '{:d} : '.format(nwidth[key])
                  + 'eaw={:s} '.format(
                      unicode_utils.EAST_ASIAN_WIDTHS[key]
                      if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
                  + 'category={:2s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['category']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'bidi={:3s} '.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
                  + 'name={:s}'.format(
                      unicode_utils.UNICODE_ATTRIBUTES[key]['name']
                      if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))

if __name__ == "__main__":
    PARSER = argparse.ArgumentParser(
        description='''
        Compare the contents of LC_CTYPE in two files and check for errors.
        ''')
    PARSER.add_argument(
        '-o', '--old_utf8_file',
        nargs='?',
        required=True,
        type=str,
        help='The old UTF-8 file.')
    PARSER.add_argument(
        '-n', '--new_utf8_file',
        nargs='?',
        required=True,
        type=str,
        help='The new UTF-8 file.')
    PARSER.add_argument(
        '-u', '--unicode_data_file',
        nargs='?',
        type=str,
        help='The UnicodeData.txt file to read.')
    PARSER.add_argument(
        '-e', '--east_asian_width_file',
        nargs='?',
        type=str,
        help='The EastAsianWidth.txt file to read.')
    PARSER.add_argument(
        '-a', '--show_added_characters',
        action='store_true',
        help='Show characters which were added in detail.')
    PARSER.add_argument(
        '-m', '--show_missing_characters',
        action='store_true',
        help='Show characters which were removed in detail.')
    PARSER.add_argument(
        '-c', '--show_changed_characters',
        action='store_true',
        help='Show characters whose width was changed in detail.')
    ARGS = PARSER.parse_args()

    if ARGS.unicode_data_file:
        unicode_utils.fill_attributes(ARGS.unicode_data_file)
    if ARGS.east_asian_width_file:
        unicode_utils.fill_east_asian_widths(ARGS.east_asian_width_file)
    check_charmap(ARGS.old_utf8_file, ARGS.new_utf8_file)
    check_width(ARGS.old_utf8_file, ARGS.new_utf8_file)
Commit	Line	Data
4a4839c9 AO	1	#!/usr/bin/python3
4a4839c9 AO	2	# -- coding: utf-8 --
04277e02	3	# Copyright (C) 2014-2019 Free Software Foundation, Inc.
4a4839c9 AO	4	# This file is part of the GNU C Library.
	5	#
	6	# The GNU C Library is free software; you can redistribute it and/or
	7	# modify it under the terms of the GNU Lesser General Public
	8	# License as published by the Free Software Foundation; either
	9	# version 2.1 of the License, or (at your option) any later version.
	10	#
	11	# The GNU C Library is distributed in the hope that it will be useful,
	12	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	# Lesser General Public License for more details.
	15	#
	16	# You should have received a copy of the GNU Lesser General Public
	17	# License along with the GNU C Library; if not, see
	18	# <http://www.gnu.org/licenses/>.
	19
	20	'''
	21	This script is useful for checking backward compatibility of newly
	22	generated UTF-8 file from utf8_gen.py script
	23
	24	To see how this script is used, call it with the “-h” option:
	25
	26	$ ./utf8_compatibility.py -h
	27	… prints usage message …
	28	'''
	29
	30	import sys
	31	import re
	32	import argparse
dd8e8e54	33	import unicode_utils
4a4839c9 AO	34
	35	def create_charmap_dictionary(file_name):
	36	'''Create a dictionary for all code points found in the CHARMAP
	37	section of a file
	38	'''
	39	with open(file_name, mode='r') as utf8_file:
	40	charmap_dictionary = {}
	41	for line in utf8_file:
	42	if line.startswith('CHARMAP'):
	43	break
	44	for line in utf8_file:
	45	if line.startswith('END CHARMAP'):
	46	return charmap_dictionary
	47	if line.startswith('%'):
	48	continue
	49	match = re.match(
	50	r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
	51	+r'(:?\.\.<U(?P<codepoint2>[0-9-A-F]{4,8})>)?'
	52	+r'\s+(?P<hexutf8>(/x[0-9a-f]{2}){1,4})',
	53	line)
	54	if not match:
	55	continue
	56	codepoint1 = match.group('codepoint1')
	57	codepoint2 = match.group('codepoint2')
	58	if not codepoint2:
	59	codepoint2 = codepoint1
	60	for i in range(int(codepoint1, 16),
	61	int(codepoint2, 16) + 1):
	62	charmap_dictionary[i] = match.group('hexutf8')
	63	sys.stderr.write('No “CHARMAP” or no “END CHARMAP” found in %s\n'
	64	%file_name)
	65	exit(1)
	66
	67	def check_charmap(original_file_name, new_file_name):
	68	'''Report differences in the CHARMAP section between the old and the
	69	new file
	70	'''
	71	print('************************************************************')
	72	print('Report on CHARMAP:')
	73	ocharmap = create_charmap_dictionary(original_file_name)
	74	ncharmap = create_charmap_dictionary(new_file_name)
	75	print('------------------------------------------------------------')
	76	print('Total removed characters in newly generated CHARMAP: %d'
	77	%len(set(ocharmap)-set(ncharmap)))
	78	if ARGS.show_missing_characters:
	79	for key in sorted(set(ocharmap)-set(ncharmap)):
	80	print('removed: {:s} {:s} {:s}'.format(
dd8e8e54	81	unicode_utils.ucs_symbol(key),
4a4839c9	82	ocharmap[key],
dd8e8e54 CD	83	unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
dd8e8e54 CD	84	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	85	print('------------------------------------------------------------')
	86	changed_charmap = {}
	87	for key in set(ocharmap).intersection(set(ncharmap)):
	88	if ocharmap[key] != ncharmap[key]:
	89	changed_charmap[key] = (ocharmap[key], ncharmap[key])
	90	print('Total changed characters in newly generated CHARMAP: %d'
	91	%len(changed_charmap))
	92	if ARGS.show_changed_characters:
	93	for key in sorted(changed_charmap):
	94	print('changed: {:s} {:s}->{:s} {:s}'.format(
dd8e8e54	95	unicode_utils.ucs_symbol(key),
4a4839c9 AO	96	changed_charmap[key][0],
4a4839c9 AO	97	changed_charmap[key][1],
dd8e8e54 CD	98	unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
dd8e8e54 CD	99	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	100	print('------------------------------------------------------------')
	101	print('Total added characters in newly generated CHARMAP: %d'
	102	%len(set(ncharmap)-set(ocharmap)))
	103	if ARGS.show_added_characters:
	104	for key in sorted(set(ncharmap)-set(ocharmap)):
	105	print('added: {:s} {:s} {:s}'.format(
dd8e8e54	106	unicode_utils.ucs_symbol(key),
4a4839c9	107	ncharmap[key],
dd8e8e54 CD	108	unicode_utils.UNICODE_ATTRIBUTES[key]['name'] \
dd8e8e54 CD	109	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	110
	111	def create_width_dictionary(file_name):
	112	'''Create a dictionary for all code points found in the WIDTH
	113	section of a file
	114	'''
	115	with open(file_name, mode='r') as utf8_file:
	116	width_dictionary = {}
	117	for line in utf8_file:
	118	if line.startswith('WIDTH'):
	119	break
	120	for line in utf8_file:
	121	if line.startswith('END WIDTH'):
	122	return width_dictionary
	123	match = re.match(
	124	r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
	125	+r'(:?\.\.\.<U(?P<codepoint2>[0-9-A-F]{4,8})>)?'
	126	+r'\s+(?P<width>[02])',
	127	line)
	128	if not match:
	129	continue
	130	codepoint1 = match.group('codepoint1')
	131	codepoint2 = match.group('codepoint2')
	132	if not codepoint2:
	133	codepoint2 = codepoint1
	134	for i in range(int(codepoint1, 16),
	135	int(codepoint2, 16) + 1):
	136	width_dictionary[i] = int(match.group('width'))
	137	sys.stderr.write('No “WIDTH” or no “END WIDTH” found in %s\n' %file)
	138
	139	def check_width(original_file_name, new_file_name):
	140	'''Report differences in the WIDTH section between the old and the new
	141	file
	142	'''
	143	print('************************************************************')
	144	print('Report on WIDTH:')
	145	owidth = create_width_dictionary(original_file_name)
	146	nwidth = create_width_dictionary(new_file_name)
	147	print('------------------------------------------------------------')
	148	print('Total removed characters in newly generated WIDTH: %d'
	149	%len(set(owidth)-set(nwidth)))
	150	print('(Characters not in WIDTH get width 1 by default, '
	151	+ 'i.e. these have width 1 now.)')
	152	if ARGS.show_missing_characters:
	153	for key in sorted(set(owidth)-set(nwidth)):
dd8e8e54	154	print('removed: {:s} '.format(unicode_utils.ucs_symbol(key))
4a4839c9 AO	155	+ '{:d} : '.format(owidth[key])
4a4839c9 AO	156	+ 'eaw={:s} '.format(
dd8e8e54 CD	157	unicode_utils.EAST_ASIAN_WIDTHS[key]
dd8e8e54 CD	158	if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
4a4839c9	159	+ 'category={:2s} '.format(
dd8e8e54 CD	160	unicode_utils.UNICODE_ATTRIBUTES[key]['category']
dd8e8e54 CD	161	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	162	+ 'bidi={:3s} '.format(
dd8e8e54 CD	163	unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
dd8e8e54 CD	164	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	165	+ 'name={:s}'.format(
dd8e8e54 CD	166	unicode_utils.UNICODE_ATTRIBUTES[key]['name']
dd8e8e54 CD	167	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	168	print('------------------------------------------------------------')
	169	changed_width = {}
	170	for key in set(owidth).intersection(set(nwidth)):
	171	if owidth[key] != nwidth[key]:
	172	changed_width[key] = (owidth[key], nwidth[key])
	173	print('Total changed characters in newly generated WIDTH: %d'
	174	%len(changed_width))
	175	if ARGS.show_changed_characters:
	176	for key in sorted(changed_width):
dd8e8e54	177	print('changed width: {:s} '.format(unicode_utils.ucs_symbol(key))
4a4839c9 AO	178	+ '{:d}->{:d} : '.format(changed_width[key][0],
	179	changed_width[key][1])
	180	+ 'eaw={:s} '.format(
dd8e8e54 CD	181	unicode_utils.EAST_ASIAN_WIDTHS[key]
dd8e8e54 CD	182	if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
4a4839c9	183	+ 'category={:2s} '.format(
dd8e8e54 CD	184	unicode_utils.UNICODE_ATTRIBUTES[key]['category']
dd8e8e54 CD	185	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	186	+ 'bidi={:3s} '.format(
dd8e8e54 CD	187	unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
dd8e8e54 CD	188	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	189	+ 'name={:s}'.format(
dd8e8e54 CD	190	unicode_utils.UNICODE_ATTRIBUTES[key]['name']
dd8e8e54 CD	191	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	192	print('------------------------------------------------------------')
	193	print('Total added characters in newly generated WIDTH: %d'
	194	%len(set(nwidth)-set(owidth)))
	195	print('(Characters not in WIDTH get width 1 by default, '
	196	+ 'i.e. these had width 1 before.)')
	197	if ARGS.show_added_characters:
	198	for key in sorted(set(nwidth)-set(owidth)):
dd8e8e54	199	print('added: {:s} '.format(unicode_utils.ucs_symbol(key))
4a4839c9 AO	200	+ '{:d} : '.format(nwidth[key])
4a4839c9 AO	201	+ 'eaw={:s} '.format(
dd8e8e54 CD	202	unicode_utils.EAST_ASIAN_WIDTHS[key]
dd8e8e54 CD	203	if key in unicode_utils.EAST_ASIAN_WIDTHS else 'None')
4a4839c9	204	+ 'category={:2s} '.format(
dd8e8e54 CD	205	unicode_utils.UNICODE_ATTRIBUTES[key]['category']
dd8e8e54 CD	206	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	207	+ 'bidi={:3s} '.format(
dd8e8e54 CD	208	unicode_utils.UNICODE_ATTRIBUTES[key]['bidi']
dd8e8e54 CD	209	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None')
4a4839c9	210	+ 'name={:s}'.format(
dd8e8e54 CD	211	unicode_utils.UNICODE_ATTRIBUTES[key]['name']
dd8e8e54 CD	212	if key in unicode_utils.UNICODE_ATTRIBUTES else 'None'))
4a4839c9 AO	213
	214	if __name__ == "__main__":
	215	PARSER = argparse.ArgumentParser(
	216	description='''
	217	Compare the contents of LC_CTYPE in two files and check for errors.
	218	''')
	219	PARSER.add_argument(
	220	'-o', '--old_utf8_file',
	221	nargs='?',
	222	required=True,
	223	type=str,
	224	help='The old UTF-8 file.')
	225	PARSER.add_argument(
	226	'-n', '--new_utf8_file',
	227	nargs='?',
	228	required=True,
	229	type=str,
	230	help='The new UTF-8 file.')
	231	PARSER.add_argument(
	232	'-u', '--unicode_data_file',
	233	nargs='?',
	234	type=str,
	235	help='The UnicodeData.txt file to read.')
	236	PARSER.add_argument(
	237	'-e', '--east_asian_width_file',
	238	nargs='?',
	239	type=str,
	240	help='The EastAsianWidth.txt file to read.')
	241	PARSER.add_argument(
	242	'-a', '--show_added_characters',
	243	action='store_true',
	244	help='Show characters which were added in detail.')
	245	PARSER.add_argument(
	246	'-m', '--show_missing_characters',
	247	action='store_true',
	248	help='Show characters which were removed in detail.')
	249	PARSER.add_argument(
	250	'-c', '--show_changed_characters',
	251	action='store_true',
	252	help='Show characters whose width was changed in detail.')
	253	ARGS = PARSER.parse_args()
	254
	255	if ARGS.unicode_data_file:
dd8e8e54	256	unicode_utils.fill_attributes(ARGS.unicode_data_file)
4a4839c9	257	if ARGS.east_asian_width_file:
dd8e8e54	258	unicode_utils.fill_east_asian_widths(ARGS.east_asian_width_file)
4a4839c9 AO	259	check_charmap(ARGS.old_utf8_file, ARGS.new_utf8_file)
4a4839c9 AO	260	check_width(ARGS.old_utf8_file, ARGS.new_utf8_file)