]>
Commit | Line | Data |
---|---|---|
cacbc350 RK |
1 | ------------------------------------------------------------------------------ |
2 | -- -- | |
3084fecd | 3 | -- GNAT RUN-TIME COMPONENTS -- |
cacbc350 RK |
4 | -- -- |
5 | -- S Y S T E M . W C H _ C N V -- | |
6 | -- -- | |
7 | -- S p e c -- | |
8 | -- -- | |
748086b7 | 9 | -- Copyright (C) 1992-2009, Free Software Foundation, Inc. -- |
cacbc350 RK |
10 | -- -- |
11 | -- GNAT is free software; you can redistribute it and/or modify it under -- | |
12 | -- terms of the GNU General Public License as published by the Free Soft- -- | |
748086b7 | 13 | -- ware Foundation; either version 3, or (at your option) any later ver- -- |
cacbc350 RK |
14 | -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- |
15 | -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- | |
748086b7 JJ |
16 | -- or FITNESS FOR A PARTICULAR PURPOSE. -- |
17 | -- -- | |
18 | -- As a special exception under Section 7 of GPL version 3, you are granted -- | |
19 | -- additional permissions described in the GCC Runtime Library Exception, -- | |
20 | -- version 3.1, as published by the Free Software Foundation. -- | |
21 | -- -- | |
22 | -- You should have received a copy of the GNU General Public License and -- | |
23 | -- a copy of the GCC Runtime Library Exception along with this program; -- | |
24 | -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- | |
25 | -- <http://www.gnu.org/licenses/>. -- | |
cacbc350 RK |
26 | -- -- |
27 | -- GNAT was originally developed by the GNAT team at New York University. -- | |
71ff80dc | 28 | -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
cacbc350 RK |
29 | -- -- |
30 | ------------------------------------------------------------------------------ | |
31 | ||
2d9ea47f | 32 | -- This package contains generic subprograms used for converting between |
c80d4855 RD |
33 | -- sequences of Character and Wide_Character. Wide_Wide_Character values |
34 | -- are also handled, but represented using integer range types defined in | |
35 | -- this package, so that this package can be used from applications that | |
36 | -- are restricted to Ada 95 compatibility (such as the compiler itself). | |
37 | ||
38 | -- All the algorithms for encoding and decoding are isolated in this package | |
39 | -- and in System.WCh_JIS and should not be duplicated elsewhere. The only | |
40 | -- exception to this is that GNAT.Decode_String and GNAT.Encode_String have | |
41 | -- their own circuits for UTF-8 conversions, for improved efficiency. | |
2d9ea47f | 42 | |
fbf5a39b AC |
43 | -- This unit may be used directly from an application program by providing |
44 | -- an appropriate WITH, and the interface can be expected to remain stable. | |
45 | ||
2d9ea47f RD |
46 | pragma Warnings (Off); |
47 | pragma Compiler_Unit; | |
48 | pragma Warnings (On); | |
49 | ||
cacbc350 RK |
50 | with System.WCh_Con; |
51 | ||
52 | package System.WCh_Cnv is | |
009186e0 | 53 | pragma Pure; |
82c80734 RD |
54 | |
55 | type UTF_32_Code is range 0 .. 16#7FFF_FFFF#; | |
56 | for UTF_32_Code'Size use 32; | |
57 | -- Range of allowed UTF-32 encoding values | |
cacbc350 | 58 | |
c80d4855 RD |
59 | type UTF_32_String is array (Positive range <>) of UTF_32_Code; |
60 | ||
cacbc350 RK |
61 | generic |
62 | with function In_Char return Character; | |
63 | function Char_Sequence_To_Wide_Char | |
82c80734 RD |
64 | (C : Character; |
65 | EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character; | |
cacbc350 RK |
66 | -- C is the first character of a sequence of one or more characters which |
67 | -- represent a wide character sequence. Calling the function In_Char for | |
68 | -- additional characters as required, Char_To_Wide_Char returns the | |
69 | -- corresponding wide character value. Constraint_Error is raised if the | |
70 | -- sequence of characters encountered is not a valid wide character | |
71 | -- sequence for the given encoding method. | |
c80d4855 RD |
72 | -- |
73 | -- Note on the use of brackets encoding (WCEM_Brackets). The brackets | |
74 | -- encoding method is ambiguous in the context of this function, since | |
75 | -- there is no way to tell if ["1234"] is eight unencoded characters or | |
76 | -- one encoded character. In the context of Ada sources, any sequence | |
77 | -- starting [" must be the start of an encoding (since that sequence is | |
78 | -- not valid in Ada source otherwise). The routines in this package use | |
79 | -- the same approach. If the input string contains the sequence [" then | |
80 | -- this is assumed to be the start of a brackets encoding sequence, and | |
81 | -- if it does not match the syntax, an error is raised. | |
cacbc350 | 82 | |
82c80734 RD |
83 | generic |
84 | with function In_Char return Character; | |
85 | function Char_Sequence_To_UTF_32 | |
86 | (C : Character; | |
87 | EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code; | |
88 | -- This is similar to the above, but the function returns a code from | |
89 | -- the full UTF_32 code set, which covers the full range of possible | |
90 | -- values in Wide_Wide_Character. The result can be converted to | |
91 | -- Wide_Wide_Character form using Wide_Wide_Character'Val. | |
92 | ||
cacbc350 RK |
93 | generic |
94 | with procedure Out_Char (C : Character); | |
95 | procedure Wide_Char_To_Char_Sequence | |
96 | (WC : Wide_Character; | |
97 | EM : System.WCh_Con.WC_Encoding_Method); | |
98 | -- Given a wide character, converts it into a sequence of one or | |
99 | -- more characters, calling the given Out_Char procedure for each. | |
100 | -- Constraint_Error is raised if the given wide character value is | |
101 | -- not a valid value for the given encoding method. | |
c80d4855 RD |
102 | -- |
103 | -- Note on brackets encoding (WCEM_Brackets). For the input routines above, | |
104 | -- upper half characters can be represented as ["hh"] but this procedure | |
105 | -- will only use brackets encodings for codes higher than 16#FF#, so upper | |
106 | -- half characters will be output as single Character values. | |
cacbc350 | 107 | |
82c80734 RD |
108 | generic |
109 | with procedure Out_Char (C : Character); | |
110 | procedure UTF_32_To_Char_Sequence | |
111 | (Val : UTF_32_Code; | |
112 | EM : System.WCh_Con.WC_Encoding_Method); | |
113 | -- This is similar to the above, but the input value is a code from the | |
114 | -- full UTF_32 code set, which covers the full range of possible values | |
115 | -- in Wide_Wide_Character. To convert a Wide_Wide_Character value, the | |
116 | -- caller can use Wide_Wide_Character'Pos in the call. | |
117 | ||
cacbc350 | 118 | end System.WCh_Cnv; |