]> git.ipfire.org Git - thirdparty/vala.git/commitdiff
Genie: Added Regex literals
authorJamie McCracken <jamie.mccrack gmail com>
Mon, 24 May 2010 19:09:54 +0000 (15:09 -0400)
committerJamie McCracken <jamie.mccrack gmail com>
Mon, 24 May 2010 19:26:23 +0000 (15:26 -0400)
vala/valagenieparser.vala
vala/valageniescanner.vala
vala/valagenietokentype.vala

index 18bf14601a6fb53e7172648725df6e5939fda28a..d4d01b96cad3152690ab60ddbf8d1ffb2cde52c9 100644 (file)
@@ -347,6 +347,13 @@ public class Vala.Genie.Parser : CodeVisitor {
                                Report.error (lit.source_reference, "invalid character literal");
                        }
                        return lit;
+               case TokenType.REGEX_LITERAL:
+                       next ();
+                       string match_part = get_last_string ();
+                       SourceReference src_begin = get_src (begin);
+                       expect (TokenType.CLOSE_REGEX_LITERAL);
+                       string close_token = get_last_string ();
+                       return new RegexLiteral ("%s/%s".printf (close_token, match_part), src_begin);  
                case TokenType.STRING_LITERAL:
                        next ();
                        return new StringLiteral (get_last_string (), get_src (begin));
@@ -444,6 +451,15 @@ public class Vala.Genie.Parser : CodeVisitor {
                accept (TokenType.INTERR);
                accept (TokenType.HASH);
        }
+       
+       
+       Expression parse_regex_literal () throws ParseError {
+               expect (TokenType.OPEN_REGEX_LITERAL);
+
+               var expr = parse_literal ();
+
+               return expr;
+       }
 
        DataType parse_type (bool owned_by_default = true) throws ParseError {
                var begin = get_location ();
@@ -642,6 +658,7 @@ public class Vala.Genie.Parser : CodeVisitor {
                case TokenType.INTEGER_LITERAL:
                case TokenType.REAL_LITERAL:
                case TokenType.CHARACTER_LITERAL:
+               case TokenType.REGEX_LITERAL:
                case TokenType.STRING_LITERAL:
                case TokenType.TEMPLATE_STRING_LITERAL:
                case TokenType.VERBATIM_STRING_LITERAL:
@@ -656,6 +673,9 @@ public class Vala.Genie.Parser : CodeVisitor {
                case TokenType.OPEN_PARENS:
                        expr = parse_tuple ();
                        break;
+               case TokenType.OPEN_REGEX_LITERAL:
+                       expr = parse_regex_literal ();
+                       break;
                case TokenType.OPEN_TEMPLATE:
                        expr = parse_template ();
                        break;
@@ -1227,6 +1247,7 @@ public class Vala.Genie.Parser : CodeVisitor {
                                        case TokenType.INTEGER_LITERAL:
                                        case TokenType.REAL_LITERAL:
                                        case TokenType.CHARACTER_LITERAL:
+                                       case TokenType.REGEX_LITERAL:
                                        case TokenType.STRING_LITERAL:
                                        case TokenType.TEMPLATE_STRING_LITERAL:
                                        case TokenType.VERBATIM_STRING_LITERAL:
index 5c2bb6d524d25078c289aae3e36cf7ade51555df..87e9b457b13975cda40a5d37503e3710106a6344 100644 (file)
@@ -34,7 +34,7 @@ public class Vala.Genie.Scanner {
        char* begin;
        char* current;
        char* end;
-
+       
        int line;
        int column;
 
@@ -65,6 +65,7 @@ public class Vala.Genie.Scanner {
                PARENS,
                BRACE,
                BRACKET,
+               REGEX_LITERAL,
                TEMPLATE,
                TEMPLATE_PART
        }
@@ -104,6 +105,176 @@ public class Vala.Genie.Scanner {
                return (c.isalnum () || c == '_');
        }
        
+       bool in_regex_literal () {
+               return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
+       }
+
+
+       public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
+               TokenType type;
+               char* begin = current;
+               token_begin.pos = begin;
+               token_begin.line = line;
+               token_begin.column = column;
+
+               int token_length_in_chars = -1;
+
+               if (current >= end) {
+                       type = TokenType.EOF;
+               } else {
+                       switch (current[0]) {
+                       case '/':
+                               type = TokenType.CLOSE_REGEX_LITERAL;
+                               current++;
+                               state_stack.length--;
+                               var fl_i = false;
+                               var fl_s = false;
+                               var fl_m = false;
+                               var fl_x = false;
+                               while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
+                                       switch (current[0]) {
+                                       case 'i':
+                                               if (fl_i) {
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
+                                               }
+                                               fl_i = true;
+                                               break;
+                                       case 's':
+                                               if (fl_s) {
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
+                                               }
+                                               fl_s = true;
+                                               break;
+                                       case 'm':
+                                               if (fl_m) {
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
+                                               }
+                                               fl_m = true;
+                                               break;
+                                       case 'x':
+                                               if (fl_x) {
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
+                                               }
+                                               fl_x = true;
+                                               break;
+                                       }
+                                       current++;
+                                       token_length_in_chars++;
+                               }
+                               break;
+                       default:
+                               type = TokenType.REGEX_LITERAL;
+                               token_length_in_chars = 0;
+                               while (current < end && current[0] != '/') {
+                                       if (current[0] == '\\') {
+                                               current++;
+                                               token_length_in_chars++;
+                                               if (current >= end) {
+                                                       break;
+                                               }
+
+                                               switch (current[0]) {
+                                               case '\'':
+                                               case '"':
+                                               case '\\':
+                                               case '/':
+                                               case '^':
+                                               case '$':
+                                               case '.':
+                                               case '[':
+                                               case ']':
+                                               case '{':
+                                               case '}':
+                                               case '(':
+                                               case ')':
+                                               case '?':
+                                               case '*':
+                                               case '+':
+                                               case '-':
+                                               case '#':
+                                               case '&':
+                                               case '~':
+                                               case ':':
+                                               case ';':
+                                               case '<':
+                                               case '>':
+                                               case '|':
+                                               case '%':
+                                               case '=':
+                                               case '@':
+                                               case '0':
+                                               case 'b':
+                                               case 'B':
+                                               case 'f':
+                                               case 'n':
+                                               case 'r':
+                                               case 't':
+                                               case 'a':
+                                               case 'A':
+                                               case 'p':
+                                               case 'P':
+                                               case 'e':
+                                               case 'd':
+                                               case 'D':
+                                               case 's':
+                                               case 'S':
+                                               case 'w':
+                                               case 'W':
+                                               case 'G':
+                                               case 'z':
+                                               case 'Z':
+                                                       current++;
+                                                       token_length_in_chars++;
+                                                       break;
+                                               case 'x':
+                                                       // hexadecimal escape character
+                                                       current++;
+                                                       token_length_in_chars++;
+                                                       while (current < end && current[0].isxdigit ()) {
+                                                               current++;
+                                                               token_length_in_chars++;
+                                                       }
+                                                       break;
+                                               default:
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
+                                                       break;
+                                               }
+                                       } else if (current[0] == '\n') {
+                                               break;
+                                       } else {
+                                               unichar u = ((string) current).get_char_validated ((long) (end - current));
+                                               if (u != (unichar) (-1)) {
+                                                       current += u.to_utf8 (null);
+                                                       token_length_in_chars++;
+                                               } else {
+                                                       current++;
+                                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
+                                               }
+                                       }
+                               }
+                               if (current >= end || current[0] == '\n') {
+                                       Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
+                                       state_stack.length--;
+                                       return read_token (out token_begin, out token_end);
+                               }
+                               break;
+                       }
+               }
+
+               if (token_length_in_chars < 0) {
+                       column += (int) (current - begin);
+               } else {
+                       column += token_length_in_chars;
+               }
+
+               token_end.pos = current;
+               token_end.line = line;
+               token_end.column = column - 1;
+
+               return type;
+       }
+
+       
        public void seek (SourceLocation location) {
                current = location.pos;
                line = location.line;
@@ -607,13 +778,10 @@ public class Vala.Genie.Scanner {
        public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
                
 
-
-               /* emit dedents if outstanding before checking any other chars */
-
-               if (pending_dedents > 0) {
-                       pending_dedents--;
-                       indent_level--;
-
+               if (in_template ()) {
+                       return read_template_token (out token_begin, out token_end);
+               } else if (in_template_part ()) {
+                       state_stack.length--;
 
                        token_begin.pos = current;
                        token_begin.line = line;
@@ -621,18 +789,21 @@ public class Vala.Genie.Scanner {
 
                        token_end.pos = current;
                        token_end.line = line;
-                       token_end.column = column;
-
-                       last_token = TokenType.DEDENT;
+                       token_end.column = column - 1;
 
-                       return TokenType.DEDENT;
+                       return TokenType.COMMA;
+               } else if (in_regex_literal ()) {
+                       return read_regex_token (out token_begin, out token_end);
                }
 
 
-               if (in_template ()) {
-                       return read_template_token (out token_begin, out token_end);
-               } else if (in_template_part ()) {
-                       state_stack.length--;
+
+               /* emit dedents if outstanding before checking any other chars */
+
+               if (pending_dedents > 0) {
+                       pending_dedents--;
+                       indent_level--;
+
 
                        token_begin.pos = current;
                        token_begin.line = line;
@@ -640,11 +811,12 @@ public class Vala.Genie.Scanner {
 
                        token_end.pos = current;
                        token_end.line = line;
-                       token_end.column = column - 1;
+                       token_end.column = column;
 
-                       return TokenType.COMMA;
-               }
+                       last_token = TokenType.DEDENT;
 
+                       return TokenType.DEDENT;
+               }
 
                if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
                        /* scrub whitespace (excluding newlines) and comments */                
@@ -1023,13 +1195,40 @@ public class Vala.Genie.Scanner {
                                }
                                break;
                        case '/':
-                               type = TokenType.DIV;
-                               current++;
-                               if (current < end && current[0] == '=') {
-                                       type = TokenType.ASSIGN_DIV;
+                               switch (last_token) {
+                               case TokenType.ASSIGN:
+                               case TokenType.COMMA:
+                               case TokenType.MINUS:
+                               case TokenType.OP_AND:
+                               case TokenType.OP_DEC:
+                               case TokenType.OP_EQ:
+                               case TokenType.OP_GE:
+                               case TokenType.OP_GT:
+                               case TokenType.OP_INC:
+                               case TokenType.OP_LE:
+                               case TokenType.OP_LT:
+                               case TokenType.OP_NE:
+                               case TokenType.OP_NEG:
+                               case TokenType.OP_OR:
+                               case TokenType.OPEN_BRACE:
+                               case TokenType.OPEN_PARENS:
+                               case TokenType.PLUS:
+                               case TokenType.RETURN:
+                                       type = TokenType.OPEN_REGEX_LITERAL;
+                                       state_stack += State.REGEX_LITERAL;
                                        current++;
+                                       break;
+                               default:
+                                       type = TokenType.DIV;
+                                       current++;
+                                       if (current < end && current[0] == '=') {
+                                               type = TokenType.ASSIGN_DIV;
+                                               current++;
+                                       }
+                                       break;
                                }
                                break;
+
                        case '%':
                                type = TokenType.PERCENT;
                                current++;
@@ -1152,7 +1351,6 @@ public class Vala.Genie.Scanner {
                token_end.pos = current;
                token_end.line = line;
                token_end.column = column - 1;
-               
                last_token = type;
 
                return type;
index 30ed7bac6e9ecc58d03d031a122967cd21d9439e..fb8f54f6e1875db1811405e44a53c1897cffaa52 100644 (file)
@@ -50,6 +50,7 @@ public enum Vala.Genie.TokenType {
        CLOSE_BRACE,
        CLOSE_BRACKET,
        CLOSE_PARENS,
+       CLOSE_REGEX_LITERAL,
        CLOSE_TEMPLATE,
        COLON,
        COMMA,
@@ -121,6 +122,7 @@ public enum Vala.Genie.TokenType {
        OPEN_BRACE,
        OPEN_BRACKET,
        OPEN_PARENS,
+       OPEN_REGEX_LITERAL,
        OPEN_TEMPLATE,
        OVERRIDE,
        OWNED,
@@ -138,6 +140,7 @@ public enum Vala.Genie.TokenType {
        REAL_LITERAL,
        READONLY,
        REF,
+       REGEX_LITERAL,
        REQUIRES,
        RETURN,
        SEMICOLON,
@@ -195,6 +198,7 @@ public enum Vala.Genie.TokenType {
                case CLOSE_BRACE: return "`}'";
                case CLOSE_BRACKET: return "`]'";
                case CLOSE_PARENS: return "`)'";
+               case CLOSE_REGEX_LITERAL: return "`/'";
                case COLON: return "`:'";
                case COMMA: return "`,'";
                case CONST: return "`const'";
@@ -265,6 +269,7 @@ public enum Vala.Genie.TokenType {
                case OPEN_BRACE: return "`{'";
                case OPEN_BRACKET: return "`['";
                case OPEN_PARENS: return "`('";
+               case OPEN_REGEX_LITERAL: return "`/'";
                case OVERRIDE: return "`override'";
                case OWNED: return "`owned'";
                case PARAMS: return "`params'";
@@ -281,6 +286,7 @@ public enum Vala.Genie.TokenType {
                case READONLY: return "`readonly'";
                case REAL_LITERAL: return "real literal";
                case REF: return "`ref'";
+               case REGEX_LITERAL: return "regex literal";
                case REQUIRES: return "`requires'";
                case RETURN: return "`return'";
                case SEMICOLON: return "`;'";