]>
Commit | Line | Data |
---|---|---|
cacbc350 RK |
1 | ------------------------------------------------------------------------------ |
2 | -- -- | |
3084fecd | 3 | -- GNAT RUN-TIME COMPONENTS -- |
cacbc350 RK |
4 | -- -- |
5 | -- S Y S T E M . W C H _ C N V -- | |
6 | -- -- | |
7 | -- S p e c -- | |
8 | -- -- | |
2d9ea47f | 9 | -- Copyright (C) 1992-2007, Free Software Foundation, Inc. -- |
cacbc350 RK |
10 | -- -- |
11 | -- GNAT is free software; you can redistribute it and/or modify it under -- | |
12 | -- terms of the GNU General Public License as published by the Free Soft- -- | |
13 | -- ware Foundation; either version 2, or (at your option) any later ver- -- | |
14 | -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- | |
15 | -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- | |
16 | -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- | |
17 | -- for more details. You should have received a copy of the GNU General -- | |
18 | -- Public License distributed with GNAT; see file COPYING. If not, write -- | |
cb5fee25 KC |
19 | -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, -- |
20 | -- Boston, MA 02110-1301, USA. -- | |
cacbc350 RK |
21 | -- -- |
22 | -- As a special exception, if other files instantiate generics from this -- | |
23 | -- unit, or you link this unit with other files to produce an executable, -- | |
24 | -- this unit does not by itself cause the resulting executable to be -- | |
25 | -- covered by the GNU General Public License. This exception does not -- | |
26 | -- however invalidate any other reasons why the executable file might be -- | |
27 | -- covered by the GNU Public License. -- | |
28 | -- -- | |
29 | -- GNAT was originally developed by the GNAT team at New York University. -- | |
71ff80dc | 30 | -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
cacbc350 RK |
31 | -- -- |
32 | ------------------------------------------------------------------------------ | |
33 | ||
2d9ea47f | 34 | -- This package contains generic subprograms used for converting between |
c80d4855 RD |
35 | -- sequences of Character and Wide_Character. Wide_Wide_Character values |
36 | -- are also handled, but represented using integer range types defined in | |
37 | -- this package, so that this package can be used from applications that | |
38 | -- are restricted to Ada 95 compatibility (such as the compiler itself). | |
39 | ||
40 | -- All the algorithms for encoding and decoding are isolated in this package | |
41 | -- and in System.WCh_JIS and should not be duplicated elsewhere. The only | |
42 | -- exception to this is that GNAT.Decode_String and GNAT.Encode_String have | |
43 | -- their own circuits for UTF-8 conversions, for improved efficiency. | |
2d9ea47f | 44 | |
fbf5a39b AC |
45 | -- This unit may be used directly from an application program by providing |
46 | -- an appropriate WITH, and the interface can be expected to remain stable. | |
47 | ||
2d9ea47f RD |
48 | pragma Warnings (Off); |
49 | pragma Compiler_Unit; | |
50 | pragma Warnings (On); | |
51 | ||
cacbc350 RK |
52 | with System.WCh_Con; |
53 | ||
54 | package System.WCh_Cnv is | |
009186e0 | 55 | pragma Pure; |
82c80734 RD |
56 | |
57 | type UTF_32_Code is range 0 .. 16#7FFF_FFFF#; | |
58 | for UTF_32_Code'Size use 32; | |
59 | -- Range of allowed UTF-32 encoding values | |
cacbc350 | 60 | |
c80d4855 RD |
61 | type UTF_32_String is array (Positive range <>) of UTF_32_Code; |
62 | ||
cacbc350 RK |
63 | generic |
64 | with function In_Char return Character; | |
65 | function Char_Sequence_To_Wide_Char | |
82c80734 RD |
66 | (C : Character; |
67 | EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character; | |
cacbc350 RK |
68 | -- C is the first character of a sequence of one or more characters which |
69 | -- represent a wide character sequence. Calling the function In_Char for | |
70 | -- additional characters as required, Char_To_Wide_Char returns the | |
71 | -- corresponding wide character value. Constraint_Error is raised if the | |
72 | -- sequence of characters encountered is not a valid wide character | |
73 | -- sequence for the given encoding method. | |
c80d4855 RD |
74 | -- |
75 | -- Note on the use of brackets encoding (WCEM_Brackets). The brackets | |
76 | -- encoding method is ambiguous in the context of this function, since | |
77 | -- there is no way to tell if ["1234"] is eight unencoded characters or | |
78 | -- one encoded character. In the context of Ada sources, any sequence | |
79 | -- starting [" must be the start of an encoding (since that sequence is | |
80 | -- not valid in Ada source otherwise). The routines in this package use | |
81 | -- the same approach. If the input string contains the sequence [" then | |
82 | -- this is assumed to be the start of a brackets encoding sequence, and | |
83 | -- if it does not match the syntax, an error is raised. | |
cacbc350 | 84 | |
82c80734 RD |
85 | generic |
86 | with function In_Char return Character; | |
87 | function Char_Sequence_To_UTF_32 | |
88 | (C : Character; | |
89 | EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code; | |
90 | -- This is similar to the above, but the function returns a code from | |
91 | -- the full UTF_32 code set, which covers the full range of possible | |
92 | -- values in Wide_Wide_Character. The result can be converted to | |
93 | -- Wide_Wide_Character form using Wide_Wide_Character'Val. | |
94 | ||
cacbc350 RK |
95 | generic |
96 | with procedure Out_Char (C : Character); | |
97 | procedure Wide_Char_To_Char_Sequence | |
98 | (WC : Wide_Character; | |
99 | EM : System.WCh_Con.WC_Encoding_Method); | |
100 | -- Given a wide character, converts it into a sequence of one or | |
101 | -- more characters, calling the given Out_Char procedure for each. | |
102 | -- Constraint_Error is raised if the given wide character value is | |
103 | -- not a valid value for the given encoding method. | |
c80d4855 RD |
104 | -- |
105 | -- Note on brackets encoding (WCEM_Brackets). For the input routines above, | |
106 | -- upper half characters can be represented as ["hh"] but this procedure | |
107 | -- will only use brackets encodings for codes higher than 16#FF#, so upper | |
108 | -- half characters will be output as single Character values. | |
cacbc350 | 109 | |
82c80734 RD |
110 | generic |
111 | with procedure Out_Char (C : Character); | |
112 | procedure UTF_32_To_Char_Sequence | |
113 | (Val : UTF_32_Code; | |
114 | EM : System.WCh_Con.WC_Encoding_Method); | |
115 | -- This is similar to the above, but the input value is a code from the | |
116 | -- full UTF_32 code set, which covers the full range of possible values | |
117 | -- in Wide_Wide_Character. To convert a Wide_Wide_Character value, the | |
118 | -- caller can use Wide_Wide_Character'Pos in the call. | |
119 | ||
cacbc350 | 120 | end System.WCh_Cnv; |