]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/rust/lex/rust-input-source.h
03bf43b5b3b480afe2c8ab74d1d368bfbcf1c351
1 // Copyright (C) 2020-2023 Free Software Foundation, Inc.
3 // This file is part of GCC.
5 // GCC is free software; you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3, or (at your option) any later
10 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 // You should have received a copy of the GNU General Public License
16 // along with GCC; see the file COPYING3. If not see
17 // <http://www.gnu.org/licenses/>.
19 #ifndef RUST_INPUT_SOURCE_H
20 #define RUST_INPUT_SOURCE_H
22 #include "rust-codepoint.h"
27 constexpr uint8_t UTF8_BOM1
= 0xEF;
28 constexpr uint8_t UTF8_BOM2
= 0xBB;
29 constexpr uint8_t UTF8_BOM3
= 0xBF;
31 // Input source wrapper thing.
35 // position of current character
37 std::vector
<Codepoint
> chars
;
40 // Overload operator () to return next char from input stream.
41 virtual int next_byte () = 0;
43 Codepoint
next_codepoint ()
45 uint32_t input
= next_byte ();
47 if ((int32_t) input
== EOF
)
48 return Codepoint::eof ();
49 else if (input
<= MAX_ASCII_CODEPOINT
)
54 else if ((input
& 0xC0) == 0x80)
56 // invalid (continuation; can't be first char)
57 return {CODEPOINT_INVALID
};
59 else if ((input
& 0xE0) == 0xC0)
62 uint8_t input2
= next_byte ();
63 if ((input2
& 0xC0) != 0x80)
64 return {CODEPOINT_INVALID
};
66 uint32_t output
= ((input
& 0x1F) << 6) | ((input2
& 0x3F) << 0);
69 else if ((input
& 0xF0) == 0xE0)
71 // 3 bytes or UTF-8 BOM
72 uint8_t input2
= next_byte ();
73 // If the second byte is equal to 0xBB then the input is no longer a
74 // valid UTF-8 char. Then, we check if the third byte makes up a UTF
76 if (input
== UTF8_BOM1
&& input2
== UTF8_BOM2
)
78 uint8_t input3
= next_byte ();
79 if (input3
== UTF8_BOM3
)
81 return next_codepoint ();
83 return {CODEPOINT_INVALID
};
86 if ((input2
& 0xC0) != 0x80)
87 return {CODEPOINT_INVALID
};
89 uint8_t input3
= next_byte ();
91 if ((input3
& 0xC0) != 0x80)
92 return {CODEPOINT_INVALID
};
94 uint32_t output
= ((input
& 0x0F) << 12) | ((input2
& 0x3F) << 6)
95 | ((input3
& 0x3F) << 0);
98 else if ((input
& 0xF8) == 0xF0)
101 uint8_t input2
= next_byte ();
102 if ((input2
& 0xC0) != 0x80)
103 return {CODEPOINT_INVALID
};
105 uint8_t input3
= next_byte ();
106 if ((input3
& 0xC0) != 0x80)
107 return {CODEPOINT_INVALID
};
109 uint8_t input4
= next_byte ();
110 if ((input4
& 0xC0) != 0x80)
111 return {CODEPOINT_INVALID
};
113 uint32_t output
= ((input
& 0x07) << 18) | ((input2
& 0x3F) << 12)
114 | ((input3
& 0x3F) << 6) | ((input4
& 0x3F) << 0);
119 return {CODEPOINT_INVALID
};
124 // This method must be called by the constructor to initialize the input
125 // source. We cannot move this to the constructor because it calls a
129 // Check if the input source is valid as utf-8 and copy all characters to
131 Codepoint char32
= next_codepoint ();
132 while (!char32
.is_eof () && char32
!= CODEPOINT_INVALID
)
134 chars
.push_back (char32
);
135 char32
= next_codepoint ();
138 if (char32
== CODEPOINT_INVALID
)
140 // Input source is not valid as utf-8.
141 is_valid_utf8
= false;
146 InputSource () : pos (0), chars ({}), is_valid_utf8 (true) {}
148 virtual ~InputSource () {}
150 // Checks if input source is a valid UTF-8 string
151 bool is_valid () { return is_valid_utf8
; }
153 // get the next UTF-8 character
156 if (pos
>= chars
.size ())
157 return Codepoint::eof ();
160 Codepoint c
= chars
[pos
];
166 // Returns codepoint if input source is a valid UTF-8 string. Returns
167 // nullopt otherwise.
168 tl::optional
<std::vector
<Codepoint
>> get_chars ()
177 class FileInputSource
: public InputSource
180 // Input source file.
183 int next_byte () override
{ return fgetc (input
); }
186 // Create new input source from file.
187 FileInputSource (FILE *input
) : InputSource (), input (input
) { init (); }
190 class BufferInputSource
: public InputSource
193 const std::string
&buffer
;
196 int next_byte () override
198 if (offs
>= buffer
.size ())
200 return static_cast<uint8_t> (buffer
.at (offs
++));
204 // Create new input source from file.
205 BufferInputSource (const std::string
&b
, size_t offset
)
206 : InputSource (), buffer (b
), offs (offset
)