[thirdparty/binutils-gdb.git] / gdb / ada-unicode.py

#!/usr/bin/env python3

# Generate Unicode case-folding table for Ada.

# Copyright (C) 2022-2024 Free Software Foundation, Inc.

# This file is part of GDB.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# This generates the ada-casefold.h header.
# Usage:
#   python ada-unicode.py

import gdbcopyright

# The start of the current range of case-conversions we are
# processing.  If RANGE_START is None, then we're outside of a range.
range_start = None
# End of the current range.
range_end = None
# The delta between RANGE_START and the upper-case variant of that
# character.
upper_delta = None
# The delta between RANGE_START and the lower-case variant of that
# character.
lower_delta = None

# All the ranges found and completed so far.
# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
all_ranges = []


def finish_range():
    global range_start
    global range_end
    global upper_delta
    global lower_delta
    if range_start is not None:
        all_ranges.append((range_start, range_end, upper_delta, lower_delta))
        range_start = None
        range_end = None
        upper_delta = None
        lower_delta = None


def process_codepoint(val):
    global range_start
    global range_end
    global upper_delta
    global lower_delta
    c = chr(val)
    low = c.lower()
    up = c.upper()
    # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
    # upper-cases to the two-character string "SS" (the capital form
    # is a relatively recent addition -- 2017).  Our simple scheme
    # can't handle this, so we skip it.  Also, because our approach
    # just represents runs of characters with identical folding
    # deltas, this change must terminate the current run.
    if (c == low and c == up) or len(low) != 1 or len(up) != 1:
        finish_range()
        return
    updelta = ord(up) - val
    lowdelta = ord(low) - val
    if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
        finish_range()
    if range_start is None:
        range_start = val
        upper_delta = updelta
        lower_delta = lowdelta
    range_end = val


for c in range(0, 0x10FFFF):
    process_codepoint(c)

with open("ada-casefold.h", "w") as f:
    print(
        gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
        file=f,
    )
    print("", file=f)
    for r in all_ranges:
        print(f"   {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)
Commit	Line	Data
315e4ebb TT	1	#!/usr/bin/env python3
	2
	3	# Generate Unicode case-folding table for Ada.
	4
1d506c26	5	# Copyright (C) 2022-2024 Free Software Foundation, Inc.
315e4ebb TT	6
	7	# This file is part of GDB.
	8
	9	# This program is free software; you can redistribute it and/or modify
	10	# it under the terms of the GNU General Public License as published by
	11	# the Free Software Foundation; either version 3 of the License, or
	12	# (at your option) any later version.
	13
	14	# This program is distributed in the hope that it will be useful,
	15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	17	# GNU General Public License for more details.
	18
	19	# You should have received a copy of the GNU General Public License
	20	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	21
	22	# This generates the ada-casefold.h header.
	23	# Usage:
	24	# python ada-unicode.py
	25
	26	import gdbcopyright
	27
	28	# The start of the current range of case-conversions we are
	29	# processing. If RANGE_START is None, then we're outside of a range.
	30	range_start = None
	31	# End of the current range.
	32	range_end = None
	33	# The delta between RANGE_START and the upper-case variant of that
	34	# character.
	35	upper_delta = None
	36	# The delta between RANGE_START and the lower-case variant of that
	37	# character.
	38	lower_delta = None
	39
	40	# All the ranges found and completed so far.
	41	# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
	42	all_ranges = []
	43
	44
	45	def finish_range():
	46	global range_start
	47	global range_end
	48	global upper_delta
	49	global lower_delta
	50	if range_start is not None:
	51	all_ranges.append((range_start, range_end, upper_delta, lower_delta))
	52	range_start = None
	53	range_end = None
	54	upper_delta = None
	55	lower_delta = None
	56
	57
	58	def process_codepoint(val):
	59	global range_start
	60	global range_end
	61	global upper_delta
	62	global lower_delta
	63	c = chr(val)
	64	low = c.lower()
	65	up = c.upper()
	66	# U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
	67	# upper-cases to the two-character string "SS" (the capital form
	68	# is a relatively recent addition -- 2017). Our simple scheme
	69	# can't handle this, so we skip it. Also, because our approach
70	# just represents runs of characters with identical folding
71	# deltas, this change must terminate the current run.
72	if (c == low and c == up) or len(low) != 1 or len(up) != 1:
73	finish_range()
74	return
75	updelta = ord(up) - val
76	lowdelta = ord(low) - val
77	if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
78	finish_range()
79	if range_start is None:
80	range_start = val
81	upper_delta = updelta
82	lower_delta = lowdelta
83	range_end = val
84
85
86	for c in range(0, 0x10FFFF):
87	process_codepoint(c)
88
89	with open("ada-casefold.h", "w") as f:
90	print(
91	gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
92	file=f,
93	)
b871f5ee	94	print("", file=f)
315e4ebb TT	95	for r in all_ranges:
315e4ebb TT	96	print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)