syntax error (OverflowError and ValueError can be produced by
malformed literals).
-Approach:
-
-First, check if the source consists entirely of blank lines and
-comments; if so, replace it with 'pass', because the built-in
-parser doesn't always do the right thing for these.
-
-Compile three times: as is, with \n, and with \n\n appended. If it
-compiles as is, it's complete. If it compiles with one \n appended,
-we expect more. If it doesn't compile either way, we compare the
-error we get when compiling with \n or \n\n appended. If the errors
-are the same, the code is broken. But if the errors are different, we
-expect more. Not intuitive; not even guaranteed to hold in future
-releases; but this matches the compiler's behavior from Python 1.4
-through 2.2, at least.
-
-Caveat:
-
-It is possible (but not likely) that the parser stops parsing with a
-successful outcome before reaching the end of the source; in this
-case, trailing symbols may be ignored instead of causing an error.
-For example, a backslash followed by two newlines may be followed by
-arbitrary garbage. This will be fixed once the API for the parser is
-better.
-
The two interfaces are:
compile_command(source, filename, symbol):
__all__ = ["compile_command", "Compile", "CommandCompiler"]
-PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h.
+# The following flags match the values from Include/cpython/compile.h
+# Caveat emptor: These flags are undocumented on purpose and depending
+# on their effect outside the standard library is **unsupported**.
+PyCF_DONT_IMPLY_DEDENT = 0x200
+PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000
def _maybe_compile(compiler, source, filename, symbol):
# Check for source consisting of only blank lines and comments.
with warnings.catch_warnings():
warnings.simplefilter("error")
- code1 = err1 = err2 = None
- try:
- code1 = compiler(source + "\n", filename, symbol)
- except SyntaxError as e:
- err1 = e
-
try:
- code2 = compiler(source + "\n\n", filename, symbol)
+ compiler(source + "\n", filename, symbol)
except SyntaxError as e:
- err2 = e
-
- try:
- if not code1 and _is_syntax_error(err1, err2):
- raise err1
- else:
- return None
- finally:
- err1 = err2 = None
+ if "incomplete input" in str(e):
+ return None
+ raise
def _is_syntax_error(err1, err2):
rep1 = repr(err1)
return False
def _compile(source, filename, symbol):
- return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
+ return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT)
def compile_command(source, filename="<input>", symbol="single"):
r"""Compile a command and determine whether it is incomplete.
statement, it "remembers" and compiles all subsequent program texts
with the statement in force."""
def __init__(self):
- self.flags = PyCF_DONT_IMPLY_DEDENT
+ self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT
def __call__(self, source, filename, symbol):
codeob = compile(source, filename, symbol, self.flags, True)
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
parser_flags |= PyPARSE_ASYNC_HACKS;
}
+ if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
+ parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
+ }
return parser_flags;
}
p->tok->interactive_underflow = IUNDERFLOW_STOP;
}
+static inline int
+_is_end_of_source(Parser *p) {
+ int err = p->tok->done;
+ return err == E_EOF || err == E_EOFS || err == E_EOLS;
+}
+
void *
_PyPegen_run_parser(Parser *p)
{
void *res = _PyPegen_parse(p);
assert(p->level == 0);
if (res == NULL) {
+ if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
+ PyErr_Clear();
+ return RAISE_SYNTAX_ERROR("incomplete input");
+ }
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
return NULL;
}
- // Make a second parser pass. In this pass we activate heavier and slower checks
+ // Make a second parser pass. In this pass we activate heavier and slower checks
// to produce better error messages and more complete diagnostics. Extra "invalid_*"
// rules will be active during parsing.
Token *last_token = p->tokens[p->fill - 1];
static struct tok_state *tok_new(void);
static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
-
+static int syntaxerror(struct tok_state *tok, const char *format, ...);
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
tokenizing. */
if (tok->cur != tok->inp) {
return Py_CHARMASK(*tok->cur++); /* Fast path */
}
- if (tok->done != E_OK)
- return EOF;
+ if (tok->done != E_OK) {
+ return EOF;
+ }
if (tok->fp == NULL) {
rc = tok_underflow_string(tok);
}
tok->line_start = tok->multi_line_start;
int start = tok->lineno;
tok->lineno = tok->first_lineno;
-
if (quote_size == 3) {
- return syntaxerror(tok,
- "unterminated triple-quoted string literal"
- " (detected at line %d)", start);
+ syntaxerror(tok, "unterminated triple-quoted string literal"
+ " (detected at line %d)", start);
+ if (c != '\n') {
+ tok->done = E_EOFS;
+ }
+ return ERRORTOKEN;
}
else {
- return syntaxerror(tok,
- "unterminated string literal (detected at"
- " line %d)", start);
+ syntaxerror(tok, "unterminated string literal (detected at"
+ " line %d)", start);
+ if (c != '\n') {
+ tok->done = E_EOLS;
+ }
+ return ERRORTOKEN;
}
}
if (c == quote) {