From: Jamie McCracken Date: Mon, 24 May 2010 19:09:54 +0000 (-0400) Subject: Genie: Added Regex literals X-Git-Tag: 0.9.1~49 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d944ecd50f3b53a657098092d5bacbfcd6abea99;p=thirdparty%2Fvala.git Genie: Added Regex literals --- diff --git a/vala/valagenieparser.vala b/vala/valagenieparser.vala index 18bf14601..d4d01b96c 100644 --- a/vala/valagenieparser.vala +++ b/vala/valagenieparser.vala @@ -347,6 +347,13 @@ public class Vala.Genie.Parser : CodeVisitor { Report.error (lit.source_reference, "invalid character literal"); } return lit; + case TokenType.REGEX_LITERAL: + next (); + string match_part = get_last_string (); + SourceReference src_begin = get_src (begin); + expect (TokenType.CLOSE_REGEX_LITERAL); + string close_token = get_last_string (); + return new RegexLiteral ("%s/%s".printf (close_token, match_part), src_begin); case TokenType.STRING_LITERAL: next (); return new StringLiteral (get_last_string (), get_src (begin)); @@ -444,6 +451,15 @@ public class Vala.Genie.Parser : CodeVisitor { accept (TokenType.INTERR); accept (TokenType.HASH); } + + + Expression parse_regex_literal () throws ParseError { + expect (TokenType.OPEN_REGEX_LITERAL); + + var expr = parse_literal (); + + return expr; + } DataType parse_type (bool owned_by_default = true) throws ParseError { var begin = get_location (); @@ -642,6 +658,7 @@ public class Vala.Genie.Parser : CodeVisitor { case TokenType.INTEGER_LITERAL: case TokenType.REAL_LITERAL: case TokenType.CHARACTER_LITERAL: + case TokenType.REGEX_LITERAL: case TokenType.STRING_LITERAL: case TokenType.TEMPLATE_STRING_LITERAL: case TokenType.VERBATIM_STRING_LITERAL: @@ -656,6 +673,9 @@ public class Vala.Genie.Parser : CodeVisitor { case TokenType.OPEN_PARENS: expr = parse_tuple (); break; + case TokenType.OPEN_REGEX_LITERAL: + expr = parse_regex_literal (); + break; case TokenType.OPEN_TEMPLATE: expr = parse_template (); break; @@ -1227,6 +1247,7 @@ public class Vala.Genie.Parser : CodeVisitor { case TokenType.INTEGER_LITERAL: case TokenType.REAL_LITERAL: case TokenType.CHARACTER_LITERAL: + case TokenType.REGEX_LITERAL: case TokenType.STRING_LITERAL: case TokenType.TEMPLATE_STRING_LITERAL: case TokenType.VERBATIM_STRING_LITERAL: diff --git a/vala/valageniescanner.vala b/vala/valageniescanner.vala index 5c2bb6d52..87e9b457b 100644 --- a/vala/valageniescanner.vala +++ b/vala/valageniescanner.vala @@ -34,7 +34,7 @@ public class Vala.Genie.Scanner { char* begin; char* current; char* end; - + int line; int column; @@ -65,6 +65,7 @@ public class Vala.Genie.Scanner { PARENS, BRACE, BRACKET, + REGEX_LITERAL, TEMPLATE, TEMPLATE_PART } @@ -104,6 +105,176 @@ public class Vala.Genie.Scanner { return (c.isalnum () || c == '_'); } + bool in_regex_literal () { + return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL); + } + + + public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) { + TokenType type; + char* begin = current; + token_begin.pos = begin; + token_begin.line = line; + token_begin.column = column; + + int token_length_in_chars = -1; + + if (current >= end) { + type = TokenType.EOF; + } else { + switch (current[0]) { + case '/': + type = TokenType.CLOSE_REGEX_LITERAL; + current++; + state_stack.length--; + var fl_i = false; + var fl_s = false; + var fl_m = false; + var fl_x = false; + while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') { + switch (current[0]) { + case 'i': + if (fl_i) { + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once"); + } + fl_i = true; + break; + case 's': + if (fl_s) { + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once"); + } + fl_s = true; + break; + case 'm': + if (fl_m) { + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once"); + } + fl_m = true; + break; + case 'x': + if (fl_x) { + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once"); + } + fl_x = true; + break; + } + current++; + token_length_in_chars++; + } + break; + default: + type = TokenType.REGEX_LITERAL; + token_length_in_chars = 0; + while (current < end && current[0] != '/') { + if (current[0] == '\\') { + current++; + token_length_in_chars++; + if (current >= end) { + break; + } + + switch (current[0]) { + case '\'': + case '"': + case '\\': + case '/': + case '^': + case '$': + case '.': + case '[': + case ']': + case '{': + case '}': + case '(': + case ')': + case '?': + case '*': + case '+': + case '-': + case '#': + case '&': + case '~': + case ':': + case ';': + case '<': + case '>': + case '|': + case '%': + case '=': + case '@': + case '0': + case 'b': + case 'B': + case 'f': + case 'n': + case 'r': + case 't': + case 'a': + case 'A': + case 'p': + case 'P': + case 'e': + case 'd': + case 'D': + case 's': + case 'S': + case 'w': + case 'W': + case 'G': + case 'z': + case 'Z': + current++; + token_length_in_chars++; + break; + case 'x': + // hexadecimal escape character + current++; + token_length_in_chars++; + while (current < end && current[0].isxdigit ()) { + current++; + token_length_in_chars++; + } + break; + default: + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence"); + break; + } + } else if (current[0] == '\n') { + break; + } else { + unichar u = ((string) current).get_char_validated ((long) (end - current)); + if (u != (unichar) (-1)) { + current += u.to_utf8 (null); + token_length_in_chars++; + } else { + current++; + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character"); + } + } + } + if (current >= end || current[0] == '\n') { + Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \""); + state_stack.length--; + return read_token (out token_begin, out token_end); + } + break; + } + } + + if (token_length_in_chars < 0) { + column += (int) (current - begin); + } else { + column += token_length_in_chars; + } + + token_end.pos = current; + token_end.line = line; + token_end.column = column - 1; + + return type; + } + + public void seek (SourceLocation location) { current = location.pos; line = location.line; @@ -607,13 +778,10 @@ public class Vala.Genie.Scanner { public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) { - - /* emit dedents if outstanding before checking any other chars */ - - if (pending_dedents > 0) { - pending_dedents--; - indent_level--; - + if (in_template ()) { + return read_template_token (out token_begin, out token_end); + } else if (in_template_part ()) { + state_stack.length--; token_begin.pos = current; token_begin.line = line; @@ -621,18 +789,21 @@ public class Vala.Genie.Scanner { token_end.pos = current; token_end.line = line; - token_end.column = column; - - last_token = TokenType.DEDENT; + token_end.column = column - 1; - return TokenType.DEDENT; + return TokenType.COMMA; + } else if (in_regex_literal ()) { + return read_regex_token (out token_begin, out token_end); } - if (in_template ()) { - return read_template_token (out token_begin, out token_end); - } else if (in_template_part ()) { - state_stack.length--; + + /* emit dedents if outstanding before checking any other chars */ + + if (pending_dedents > 0) { + pending_dedents--; + indent_level--; + token_begin.pos = current; token_begin.line = line; @@ -640,11 +811,12 @@ public class Vala.Genie.Scanner { token_end.pos = current; token_end.line = line; - token_end.column = column - 1; + token_end.column = column; - return TokenType.COMMA; - } + last_token = TokenType.DEDENT; + return TokenType.DEDENT; + } if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) { /* scrub whitespace (excluding newlines) and comments */ @@ -1023,13 +1195,40 @@ public class Vala.Genie.Scanner { } break; case '/': - type = TokenType.DIV; - current++; - if (current < end && current[0] == '=') { - type = TokenType.ASSIGN_DIV; + switch (last_token) { + case TokenType.ASSIGN: + case TokenType.COMMA: + case TokenType.MINUS: + case TokenType.OP_AND: + case TokenType.OP_DEC: + case TokenType.OP_EQ: + case TokenType.OP_GE: + case TokenType.OP_GT: + case TokenType.OP_INC: + case TokenType.OP_LE: + case TokenType.OP_LT: + case TokenType.OP_NE: + case TokenType.OP_NEG: + case TokenType.OP_OR: + case TokenType.OPEN_BRACE: + case TokenType.OPEN_PARENS: + case TokenType.PLUS: + case TokenType.RETURN: + type = TokenType.OPEN_REGEX_LITERAL; + state_stack += State.REGEX_LITERAL; current++; + break; + default: + type = TokenType.DIV; + current++; + if (current < end && current[0] == '=') { + type = TokenType.ASSIGN_DIV; + current++; + } + break; } break; + case '%': type = TokenType.PERCENT; current++; @@ -1152,7 +1351,6 @@ public class Vala.Genie.Scanner { token_end.pos = current; token_end.line = line; token_end.column = column - 1; - last_token = type; return type; diff --git a/vala/valagenietokentype.vala b/vala/valagenietokentype.vala index 30ed7bac6..fb8f54f6e 100644 --- a/vala/valagenietokentype.vala +++ b/vala/valagenietokentype.vala @@ -50,6 +50,7 @@ public enum Vala.Genie.TokenType { CLOSE_BRACE, CLOSE_BRACKET, CLOSE_PARENS, + CLOSE_REGEX_LITERAL, CLOSE_TEMPLATE, COLON, COMMA, @@ -121,6 +122,7 @@ public enum Vala.Genie.TokenType { OPEN_BRACE, OPEN_BRACKET, OPEN_PARENS, + OPEN_REGEX_LITERAL, OPEN_TEMPLATE, OVERRIDE, OWNED, @@ -138,6 +140,7 @@ public enum Vala.Genie.TokenType { REAL_LITERAL, READONLY, REF, + REGEX_LITERAL, REQUIRES, RETURN, SEMICOLON, @@ -195,6 +198,7 @@ public enum Vala.Genie.TokenType { case CLOSE_BRACE: return "`}'"; case CLOSE_BRACKET: return "`]'"; case CLOSE_PARENS: return "`)'"; + case CLOSE_REGEX_LITERAL: return "`/'"; case COLON: return "`:'"; case COMMA: return "`,'"; case CONST: return "`const'"; @@ -265,6 +269,7 @@ public enum Vala.Genie.TokenType { case OPEN_BRACE: return "`{'"; case OPEN_BRACKET: return "`['"; case OPEN_PARENS: return "`('"; + case OPEN_REGEX_LITERAL: return "`/'"; case OVERRIDE: return "`override'"; case OWNED: return "`owned'"; case PARAMS: return "`params'"; @@ -281,6 +286,7 @@ public enum Vala.Genie.TokenType { case READONLY: return "`readonly'"; case REAL_LITERAL: return "real literal"; case REF: return "`ref'"; + case REGEX_LITERAL: return "regex literal"; case REQUIRES: return "`requires'"; case RETURN: return "`return'"; case SEMICOLON: return "`;'";