# this won't work with compound complex inputs
continue
self.assertEqual(number_token(lit), lit)
+ # Valid cases with extra underscores in the tokenize module
+ # See gh-105549 for context
+ extra_valid_cases = {"0_7", "09_99"}
for lit in INVALID_UNDERSCORE_LITERALS:
+ if lit in extra_valid_cases:
+ continue
try:
number_token(lit)
except TokenError:
self.check_roundtrip(code)
+class InvalidPythonTests(TestCase):
+ def test_number_followed_by_name(self):
+ # See issue #gh-105549
+ source = "2sin(x)"
+ expected_tokens = [
+ TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
+ TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
+ TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
+ TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
+ TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
+ TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
+ TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+ ]
+
+ tokens = list(generate_tokens(StringIO(source).readline))
+ self.assertEqual(tokens, expected_tokens)
+
+ def test_number_starting_with_zero(self):
+ source = "01234"
+ expected_tokens = [
+ TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
+ TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
+ TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+ ]
+
+ tokens = list(generate_tokens(StringIO(source).readline))
+ self.assertEqual(tokens, expected_tokens)
+
class CTokenizeTest(TestCase):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
}
static int
-verify_end_of_number(struct tok_state *tok, int c, const char *kind)
-{
+verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
+ if (tok->tok_extra_tokens) {
+ // When we are parsing extra tokens, we don't want to emit warnings
+ // about invalid literals, because we want to be a bit more liberal.
+ return 1;
+ }
/* Emit a deprecation warning only if the numeric literal is immediately
* followed by one of keywords which can occur after a numeric literal
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
static int
verify_identifier(struct tok_state *tok)
{
+ if (tok->tok_extra_tokens) {
+ return 1;
+ }
PyObject *s;
if (tok->decoding_erred)
return 0;
else if (c == 'j' || c == 'J') {
goto imaginary;
}
- else if (nonzero) {
+ else if (nonzero && !tok->tok_extra_tokens) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
return MAKE_TOKEN(syntaxerror_known_range(