[thirdparty/gcc.git] / gcc / ada / widechar.adb

------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                             W I D E C H A R                              --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--          Copyright (C) 1992-2020, Free Software Foundation, Inc.         --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

--  Note: this package uses the generic subprograms in System.WCh_Cnv, which
--  completely encapsulate the set of wide character encoding methods, so no
--  modifications are required when adding new encoding methods.

with Opt; use Opt;

with System.WCh_Cnv; use System.WCh_Cnv;
with System.WCh_Con; use System.WCh_Con;

package body Widechar is

   ---------------------------
   -- Is_Start_Of_Wide_Char --
   ---------------------------

   function Is_Start_Of_Wide_Char
     (S : Source_Buffer_Ptr;
      P : Source_Ptr) return Boolean
   is
   begin
      case Wide_Character_Encoding_Method is

         --  For Hex mode, just test for an ESC character. The ESC character
         --  cannot appear in any other context in a legal Ada program.

         when WCEM_Hex =>
            return S (P) = ASCII.ESC;

         --  For brackets, just test ["x where x is a hex character. This is
         --  sufficient test, since this sequence cannot otherwise appear in a
         --  legal Ada program.

         when WCEM_Brackets =>
            return P <= S'Last - 2
              and then S (P) = '['
              and then S (P + 1) = '"'
              and then (S (P + 2) in '0' .. '9'
                            or else
                           S (P + 2) in 'a' .. 'f'
                            or else
                        S (P + 2) in 'A' .. 'F');

         --  All other encoding methods use the upper bit set in the first
         --  character to uniquely represent a wide character.

         when WCEM_EUC
            | WCEM_Shift_JIS
            | WCEM_Upper
            | WCEM_UTF8
         =>
            return S (P) >= Character'Val (16#80#);
      end case;
   end Is_Start_Of_Wide_Char;

   -----------------
   -- Length_Wide --
   -----------------

   function Length_Wide return Nat is
   begin
      return WC_Longest_Sequence;
   end Length_Wide;

   ---------------
   -- Scan_Wide --
   ---------------

   procedure Scan_Wide
     (S   : Source_Buffer_Ptr;
      P   : in out Source_Ptr;
      C   : out Char_Code;
      Err : out Boolean)
   is
      P_Init : constant Source_Ptr := P;
      Chr    : Character;

      function In_Char return Character;
      --  Function to obtain characters of wide character escape sequence

      -------------
      -- In_Char --
      -------------

      function In_Char return Character is
      begin
         P := P + 1;
         return S (P - 1);
      end In_Char;

      function WC_In is new Char_Sequence_To_UTF_32 (In_Char);

   --  Start of processing for Scan_Wide

   begin
      Chr := In_Char;

      --  Scan out the wide character. If the first character is a bracket,
      --  we allow brackets encoding regardless of the standard encoding
      --  method being used, but otherwise we use this standard method.

      if Chr = '[' then
         C := Char_Code (WC_In (Chr, WCEM_Brackets));
      else
         C := Char_Code (WC_In (Chr, Wide_Character_Encoding_Method));
      end if;

      Err := False;
      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);

   exception
      when Constraint_Error =>
         C := Char_Code (0);
         P := P - 1;
         Err := True;
   end Scan_Wide;

   --------------
   -- Set_Wide --
   --------------

   procedure Set_Wide
     (C : Char_Code;
      S : in out String;
      P : in out Natural)
   is
      procedure Out_Char (C : Character);
      --  Procedure to store one character of wide character sequence

      --------------
      -- Out_Char --
      --------------

      procedure Out_Char (C : Character) is
      begin
         P := P + 1;
         S (P) := C;
      end Out_Char;

      procedure WC_Out is new UTF_32_To_Char_Sequence (Out_Char);

   --  Start of processing for Set_Wide

   begin
      WC_Out (UTF_32_Code (C), Wide_Character_Encoding_Method);
   end Set_Wide;

   ---------------
   -- Skip_Wide --
   ---------------

   procedure Skip_Wide (S : String; P : in out Natural) is
      P_Init : constant Natural := P;

      function Skip_Char return Character;
      --  Function to skip one character of wide character escape sequence

      ---------------
      -- Skip_Char --
      ---------------

      function Skip_Char return Character is
      begin
         P := P + 1;
         return S (P - 1);
      end Skip_Char;

      function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);

      Discard : UTF_32_Code;
      pragma Warnings (Off, Discard);

   --  Start of processing for Skip_Wide

   begin
      Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
   end Skip_Wide;

   ---------------
   -- Skip_Wide --
   ---------------

   procedure Skip_Wide (S : Source_Buffer_Ptr; P : in out Source_Ptr) is
      P_Init : constant Source_Ptr := P;

      function Skip_Char return Character;
      --  Function to skip one character of wide character escape sequence

      ---------------
      -- Skip_Char --
      ---------------

      function Skip_Char return Character is
      begin
         P := P + 1;
         return S (P - 1);
      end Skip_Char;

      function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);

      Discard : UTF_32_Code;
      pragma Warnings (Off, Discard);

   --  Start of processing for Skip_Wide

   begin
      Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
   end Skip_Wide;

end Widechar;
Commit	Line	Data
415dddc8 RK	1	------------------------------------------------------------------------------
	2	-- --
	3	-- GNAT COMPILER COMPONENTS --
	4	-- --
	5	-- W I D E C H A R --
	6	-- --
	7	-- B o d y --
	8	-- --
4b490c1e	9	-- Copyright (C) 1992-2020, Free Software Foundation, Inc. --
415dddc8 RK	10	-- --
	11	-- GNAT is free software; you can redistribute it and/or modify it under --
	12	-- terms of the GNU General Public License as published by the Free Soft- --
748086b7	13	-- ware Foundation; either version 3, or (at your option) any later ver- --
415dddc8 RK	14	-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
415dddc8 RK	15	-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
748086b7 JJ	16	-- or FITNESS FOR A PARTICULAR PURPOSE. --
	17	-- --
	18	-- As a special exception under Section 7 of GPL version 3, you are granted --
	19	-- additional permissions described in the GCC Runtime Library Exception, --
	20	-- version 3.1, as published by the Free Software Foundation. --
	21	-- --
	22	-- You should have received a copy of the GNU General Public License and --
	23	-- a copy of the GCC Runtime Library Exception along with this program; --
	24	-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
	25	-- <http://www.gnu.org/licenses/>. --
415dddc8 RK	26	-- --
415dddc8 RK	27	-- GNAT was originally developed by the GNAT team at New York University. --
71ff80dc	28	-- Extensive contributions were provided by Ada Core Technologies Inc. --
415dddc8 RK	29	-- --
	30	------------------------------------------------------------------------------
	31
c7a7f405	32	-- Note: this package uses the generic subprograms in System.WCh_Cnv, which
415dddc8 RK	33	-- completely encapsulate the set of wide character encoding methods, so no
	34	-- modifications are required when adding new encoding methods.
	35
	36	with Opt; use Opt;
	37
	38	with System.WCh_Cnv; use System.WCh_Cnv;
	39	with System.WCh_Con; use System.WCh_Con;
	40
	41	package body Widechar is
	42
	43	---------------------------
	44	-- Is_Start_Of_Wide_Char --
	45	---------------------------
	46
	47	function Is_Start_Of_Wide_Char
82c80734 RD	48	(S : Source_Buffer_Ptr;
82c80734 RD	49	P : Source_Ptr) return Boolean
415dddc8 RK	50	is
	51	begin
	52	case Wide_Character_Encoding_Method is
86e16c56 RD	53
	54	-- For Hex mode, just test for an ESC character. The ESC character
	55	-- cannot appear in any other context in a legal Ada program.
	56
415dddc8 RK	57	when WCEM_Hex =>
	58	return S (P) = ASCII.ESC;
	59
86e16c56 RD	60	-- For brackets, just test ["x where x is a hex character. This is
	61	-- sufficient test, since this sequence cannot otherwise appear in a
	62	-- legal Ada program.
415dddc8 RK	63
	64	when WCEM_Brackets =>
	65	return P <= S'Last - 2
	66	and then S (P) = '['
	67	and then S (P + 1) = '"'
86e16c56 RD	68	and then (S (P + 2) in '0' .. '9'
	69	or else
	70	S (P + 2) in 'a' .. 'f'
	71	or else
	72	S (P + 2) in 'A' .. 'F');
	73
	74	-- All other encoding methods use the upper bit set in the first
	75	-- character to uniquely represent a wide character.
	76
d8f43ee6 HK	77	when WCEM_EUC
	78	\| WCEM_Shift_JIS
	79	\| WCEM_Upper
	80	\| WCEM_UTF8
	81	=>
86e16c56	82	return S (P) >= Character'Val (16#80#);
415dddc8 RK	83	end case;
	84	end Is_Start_Of_Wide_Char;
	85
	86	-----------------
	87	-- Length_Wide --
	88	-----------------
	89
	90	function Length_Wide return Nat is
	91	begin
	92	return WC_Longest_Sequence;
	93	end Length_Wide;
	94
	95	---------------
	96	-- Scan_Wide --
	97	---------------
	98
	99	procedure Scan_Wide
	100	(S : Source_Buffer_Ptr;
	101	P : in out Source_Ptr;
	102	C : out Char_Code;
	103	Err : out Boolean)
	104	is
d52f1094	105	P_Init : constant Source_Ptr := P;
86e16c56	106	Chr : Character;
d52f1094	107
415dddc8 RK	108	function In_Char return Character;
	109	-- Function to obtain characters of wide character escape sequence
	110
82c80734 RD	111	-------------
	112	-- In_Char --
	113	-------------
	114
415dddc8 RK	115	function In_Char return Character is
	116	begin
	117	P := P + 1;
	118	return S (P - 1);
	119	end In_Char;
	120
82c80734 RD	121	function WC_In is new Char_Sequence_To_UTF_32 (In_Char);
82c80734 RD	122
efde9617	123	-- Start of processing for Scan_Wide
415dddc8 RK	124
415dddc8 RK	125	begin
86e16c56 RD	126	Chr := In_Char;
86e16c56 RD	127
3354f96d	128	-- Scan out the wide character. If the first character is a bracket,
86e16c56 RD	129	-- we allow brackets encoding regardless of the standard encoding
	130	-- method being used, but otherwise we use this standard method.
	131
	132	if Chr = '[' then
	133	C := Char_Code (WC_In (Chr, WCEM_Brackets));
	134	else
	135	C := Char_Code (WC_In (Chr, Wide_Character_Encoding_Method));
	136	end if;
	137
415dddc8	138	Err := False;
d52f1094	139	Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
415dddc8 RK	140
	141	exception
	142	when Constraint_Error =>
	143	C := Char_Code (0);
	144	P := P - 1;
	145	Err := True;
	146	end Scan_Wide;
	147
	148	--------------
	149	-- Set_Wide --
	150	--------------
	151
	152	procedure Set_Wide
	153	(C : Char_Code;
	154	S : in out String;
	155	P : in out Natural)
	156	is
	157	procedure Out_Char (C : Character);
	158	-- Procedure to store one character of wide character sequence
	159
82c80734 RD	160	--------------
	161	-- Out_Char --
	162	--------------
	163
415dddc8 RK	164	procedure Out_Char (C : Character) is
	165	begin
	166	P := P + 1;
	167	S (P) := C;
	168	end Out_Char;
	169
82c80734 RD	170	procedure WC_Out is new UTF_32_To_Char_Sequence (Out_Char);
	171
	172	-- Start of processing for Set_Wide
415dddc8 RK	173
415dddc8 RK	174	begin
82c80734	175	WC_Out (UTF_32_Code (C), Wide_Character_Encoding_Method);
415dddc8 RK	176	end Set_Wide;
	177
	178	---------------
	179	-- Skip_Wide --
	180	---------------
	181
	182	procedure Skip_Wide (S : String; P : in out Natural) is
d52f1094 RD	183	P_Init : constant Natural := P;
d52f1094 RD	184
415dddc8 RK	185	function Skip_Char return Character;
	186	-- Function to skip one character of wide character escape sequence
	187
82c80734 RD	188	---------------
	189	-- Skip_Char --
	190	---------------
	191
415dddc8 RK	192	function Skip_Char return Character is
	193	begin
	194	P := P + 1;
	195	return S (P - 1);
	196	end Skip_Char;
	197
82c80734	198	function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);
415dddc8	199
82c80734	200	Discard : UTF_32_Code;
fbf5a39b	201	pragma Warnings (Off, Discard);
415dddc8	202
82c80734 RD	203	-- Start of processing for Skip_Wide
82c80734 RD	204
415dddc8 RK	205	begin
415dddc8 RK	206	Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
d52f1094	207	Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
415dddc8 RK	208	end Skip_Wide;
415dddc8 RK	209
82c80734 RD	210	---------------
	211	-- Skip_Wide --
	212	---------------
	213
	214	procedure Skip_Wide (S : Source_Buffer_Ptr; P : in out Source_Ptr) is
d52f1094 RD	215	P_Init : constant Source_Ptr := P;
d52f1094 RD	216
82c80734 RD	217	function Skip_Char return Character;
	218	-- Function to skip one character of wide character escape sequence
	219
	220	---------------
	221	-- Skip_Char --
	222	---------------
	223
	224	function Skip_Char return Character is
	225	begin
	226	P := P + 1;
	227	return S (P - 1);
	228	end Skip_Char;
	229
	230	function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);
	231
	232	Discard : UTF_32_Code;
	233	pragma Warnings (Off, Discard);
	234
	235	-- Start of processing for Skip_Wide
	236
	237	begin
	238	Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
d52f1094	239	Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
82c80734 RD	240	end Skip_Wide;
82c80734 RD	241
415dddc8	242	end Widechar;