]> git.ipfire.org Git - thirdparty/git.git/commitdiff
userdiff-cpp: tighten word regex
authorJohannes Sixt <j6t@kdbg.org>
Fri, 8 Oct 2021 19:09:55 +0000 (19:09 +0000)
committerJunio C Hamano <gitster@pobox.com>
Fri, 8 Oct 2021 20:04:07 +0000 (13:04 -0700)
Generally, word regex can be written such that they match tokens
liberally and need not model the actual syntax because it can be assumed
that the regex will only be applied to syntactically correct text.

The regex for cpp (C/C++) is too liberal, though. It regards these
sequences as single tokens:

   1+2
   1.5-e+2+f

and the following amalgams as one token:

   .l      as in str.length
   .f      as in str.find
   .e      as in str.erase

Tighten the regex in the following way:

- Accept + and - only in one position in the exponent. + and - are no
  longer regarded as the sign of a number and are treated by the
  catcher-all that is not visible in the driver's regex.

- Accept a leading decimal point only when it is followed by a digit.

For readability, factor hex- and binary numbers into an own term.

As a drive-by, this fixes that floating point numbers such as 12E5
(with upper-case E) were split into two tokens.

Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
t/t4034/cpp/expect
userdiff.c

index 63e53a61e6222bd6747e252eb40549cc837915d8..46c9460a968e47402bd3dcee54810c9a3c45c342 100644 (file)
@@ -3,24 +3,24 @@
 <BOLD>--- a/pre<RESET>
 <BOLD>+++ b/post<RESET>
 <CYAN>@@ -1,30 +1,30 @@<RESET>
-Foo() : x(0<RED>&&1<RESET><GREEN>&42<RESET>) { <RED>foo0<RESET><GREEN>bar<RESET>(x<RED>.f<RESET><GREEN>.F<RESET>ind); }
+Foo() : x(0<RED>&&1<RESET><GREEN>&42<RESET>) { <RED>foo0<RESET><GREEN>bar<RESET>(x.<RED>find<RESET><GREEN>Find<RESET>); }
 cout<<"Hello World<RED>!<RESET><GREEN>?<RESET>\n"<<endl;
-<GREEN>(<RESET>1 <RED>-1e10<RESET><GREEN>+1e10<RESET> 0xabcdef<GREEN>)<RESET> '<RED>x<RESET><GREEN>y<RESET>'
+<GREEN>(<RESET>1 <RED>-<RESET><GREEN>+<RESET>1e10 0xabcdef<GREEN>)<RESET> '<RED>x<RESET><GREEN>y<RESET>'
 // long double<RESET>
 <RED>3.141592653e-10l<RESET><GREEN>3.141592654e+10l<RESET>
 // float<RESET>
-120<RED>E5f<RESET><GREEN>E6f<RESET>
+<RED>120E5f<RESET><GREEN>120E6f<RESET>
 // hex<RESET>
-<RED>0xdeadbeaf+8<RESET><GREEN>0xdeadBeaf+7<RESET>ULL
+<RED>0xdeadbeaf<RESET><GREEN>0xdeadBeaf<RESET>+<RED>8ULL<RESET><GREEN>7ULL<RESET>
 // octal<RESET>
 <RED>01234567<RESET><GREEN>01234560<RESET>
 // binary<RESET>
 <RED>0b1000<RESET><GREEN>0b1100<RESET>+e1
 // expression<RESET>
-<RED>1.5-e+2+f<RESET><GREEN>1.5-e+3+f<RESET>
+1.5-e+<RED>2<RESET><GREEN>3<RESET>+f
 // another one<RESET>
-str<RED>.e+65<RESET><GREEN>.e+75<RESET>
-[a] b<RED>-><RESET><GREEN>->*<RESET>v d<RED>.e<RESET><GREEN>.*e<RESET>
+str.e+<RED>65<RESET><GREEN>75<RESET>
+[a] b<RED>-><RESET><GREEN>->*<RESET>v d<RED>.<RESET><GREEN>.*<RESET>e
 <GREEN>~<RESET>!a <GREEN>!<RESET>~b c<RED>++<RESET><GREEN>+<RESET> d<RED>--<RESET><GREEN>-<RESET> e*<GREEN>*<RESET>f g<RED>&<RESET><GREEN>&&<RESET>h
 a<RED>*<RESET><GREEN>*=<RESET>b c<RED>/<RESET><GREEN>/=<RESET>d e<RED>%<RESET><GREEN>%=<RESET>f
 a<RED>+<RESET><GREEN>++<RESET>b c<RED>-<RESET><GREEN>--<RESET>d
@@ -30,6 +30,6 @@ a<RED>==<RESET><GREEN>!=<RESET>b c<RED>!=<RESET><GREEN>=<RESET>d
 a<RED>^<RESET><GREEN>^=<RESET>b c<RED>|<RESET><GREEN>|=<RESET>d e<RED>&&<RESET><GREEN>&=<RESET>f
 a<RED>||<RESET><GREEN>|<RESET>b
 a?<GREEN>:<RESET>b
-a<RED>=<RESET><GREEN>==<RESET>b c<RED>+=<RESET><GREEN>+<RESET>d <RED>e-=f<RESET><GREEN>e-f<RESET> g<RED>*=<RESET><GREEN>*<RESET>h i<RED>/=<RESET><GREEN>/<RESET>j k<RED>%=<RESET><GREEN>%<RESET>l m<RED><<=<RESET><GREEN><<<RESET>n o<RED>>>=<RESET><GREEN>>><RESET>p q<RED>&=<RESET><GREEN>&<RESET>r s<RED>^=<RESET><GREEN>^<RESET>t u<RED>|=<RESET><GREEN>|<RESET>v
+a<RED>=<RESET><GREEN>==<RESET>b c<RED>+=<RESET><GREEN>+<RESET>d e<RED>-=<RESET><GREEN>-<RESET>f g<RED>*=<RESET><GREEN>*<RESET>h i<RED>/=<RESET><GREEN>/<RESET>j k<RED>%=<RESET><GREEN>%<RESET>l m<RED><<=<RESET><GREEN><<<RESET>n o<RED>>>=<RESET><GREEN>>><RESET>p q<RED>&=<RESET><GREEN>&<RESET>r s<RED>^=<RESET><GREEN>^<RESET>t u<RED>|=<RESET><GREEN>|<RESET>v
 a,b<RESET>
 a<RED>::<RESET><GREEN>:<RESET>b
index af02b1878c7d0128a2b650d03c050168e9ac7a3e..8b49194f5619fd4d2df0c454623686cf40c3d1ce 100644 (file)
@@ -64,8 +64,14 @@ PATTERNS("cpp",
         /* functions/methods, variables, and compounds at top level */
         "^((::[[:space:]]*)?[A-Za-z_].*)$",
         /* -- */
+        /* identifiers and keywords */
         "[a-zA-Z_][a-zA-Z0-9_]*"
-        "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lLuU]*"
+        /* decimal and octal integers as well as floatingpoint numbers */
+        "|[0-9][0-9.]*([Ee][-+]?[0-9]+)?[fFlLuU]*"
+        /* hexadecimal and binary integers */
+        "|0[xXbB][0-9a-fA-F]+[lLuU]*"
+        /* floatingpoint numbers that begin with a decimal point */
+        "|\\.[0-9]+([Ee][-+]?[0-9]+)?[fFlL]?"
         "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*"),
 PATTERNS("csharp",
         /* Keywords */