]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
runtime(sed): Update syntax, support more GNU address extensions
authorDoug Kearns <dougkearns@gmail.com>
Sat, 7 Mar 2026 10:11:39 +0000 (10:11 +0000)
committerChristian Brabandt <cb@256bit.org>
Sat, 7 Mar 2026 10:13:42 +0000 (10:13 +0000)
- Support all GNU address extensions.
- Fix some bugs related to erroneous matching of pattern delimiters in
  bracket expressions.

closes: #19587

Signed-off-by: Doug Kearns <dougkearns@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
runtime/syntax/sed.vim
runtime/syntax/testdir/dumps/sed_00.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/sed_01.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/sed_02.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/sed_03.dump [new file with mode: 0644]
runtime/syntax/testdir/input/sed.sed [new file with mode: 0644]

index d1f631df4b889c28b3c85df8becab06798d3e3f1..da6c9f85f6be70f32b47f660dafe8421ef767944 100644 (file)
@@ -3,7 +3,7 @@
 " Maintainer:          Doug Kearns <dougkearns@gmail.com>
 " Previous Maintainer: Haakon Riiser <hakonrk@fys.uio.no>
 " Contributor:         Jack Haden-Enneking
-" Last Change:         2022 Oct 15
+" Last Change:         2026 Mar 06
 
 " quit when a syntax file was already loaded
 if exists("b:current_syntax")
@@ -16,10 +16,31 @@ syn match sedError  "\S"
 
 syn match sedWhitespace "\s\+" contained
 syn match sedSemicolon ";"
-syn match sedAddress   "[[:digit:]$]"
+
+" Addresses {{{1
+syn match sedAddress   "\d\+\|\$"
+
+" GNU extensions
 syn match sedAddress   "\d\+\~\d\+"
-syn region sedAddress  matchgroup=Special start="[{,;]\s*/\%(\\/\)\="lc=1 skip="[^\\]\%(\\\\\)*\\/" end="/I\=" contains=sedTab,sedRegexpMeta
-syn region sedAddress  matchgroup=Special start="^\s*/\%(\\/\)\=" skip="[^\\]\%(\\\\\)*\\/" end="/I\=" contains=sedTab,sedRegexpMeta
+syn match sedAddress   "\~\d\+"
+syn match sedAddress   "[-+]\d\+"
+
+syn region sedAddress
+      \ matchgroup=Delimiter
+      \ start="[{,;]\s*/\%(\\/\)\="lc=1
+      \ skip="[^\\]\%(\\\\\)*\\/"
+      "\ GNU extensions
+      \ end="/\%(IM\|MI\|[IM]\)\="
+      \ contains=sedTab,sedRegexpMeta
+syn region sedAddress
+      \ matchgroup=Delimiter
+      \ start="^\s*/\%(\\/\)\="
+      "\ GNU extensions
+      \ skip="[^\\]\%(\\\\\)*\\/"
+      \ end="/\%(IM\|MI\|[IM]\)\="
+      \ contains=sedTab,sedRegexpMeta
+" }}}
+
 syn match sedFunction  "[dDgGhHlnNpPqQx=]\s*\%($\|;\)" contains=sedSemicolon,sedWhitespace
 if exists("g:sed_dialect") && g:sed_dialect ==? "bsd"
   syn match sedComment "^\s*#.*$" contains=sedTodo
@@ -50,7 +71,7 @@ syn region sedFlagWrite           matchgroup=sedFlag start="w" matchgroup=sedSemicolon
 syn match sedFlag          "[[:digit:]gpI]*w\=" contains=sedFlagWrite contained
 syn match sedRegexpMeta            "[.*^$]" contained
 syn match sedRegexpMeta            "\\." contains=sedTab contained
-syn match sedRegexpMeta            "\[.\{-}\]" contains=sedTab contained
+syn match sedRegexpMeta            "\[\^\=\]\=\%(\[:.\{-}:\]\|\[\..\{-}\.\]\|\[=.\{-}=\]\|[^]]\)*\]" contains=sedTab contained
 syn match sedRegexpMeta            "\\{\d\*,\d*\\}" contained
 syn match sedRegexpMeta            "\\%(.\{-}\\)" contains=sedTab contained
 syn match sedReplaceMeta    "&\|\\\%($\|.\)" contains=sedTab contained
@@ -68,15 +89,44 @@ let s:metacharacters = '$*.\^[~'
 while s:i <= s:last
   let s:delimiter = escape(nr2char(s:i), s:metacharacters)
   if s:i != s:at
-    exe 'syn region sedAddress matchgroup=Special start=@\\'.s:delimiter.'\%(\\'.s:delimiter.'\)\=@ skip=@[^\\]\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'[IM]\=@ contains=sedTab'
-    exe 'syn region sedRegexp'.s:i  'matchgroup=Special start=@'.s:delimiter.'\%(\\\\\|\\'.s:delimiter.'\)*@ skip=@[^\\'.s:delimiter.']\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'@me=e-1 contains=sedTab,sedRegexpMeta keepend contained nextgroup=sedReplacement'.s:i
-    exe 'syn region sedReplacement'.s:i 'matchgroup=Special start=@'.s:delimiter.'\%(\\\\\|\\'.s:delimiter.'\)*@ skip=@[^\\'.s:delimiter.']\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'@ contains=sedTab,sedReplaceMeta keepend contained nextgroup=@sedFlags'
+    exe 'syn region sedAddress'
+         \ 'matchgroup=Delimiter'
+         \ 'start=@\\' .. s:delimiter .. '\%(\\' .. s:delimiter .. '\)\=@'
+         \ 'skip=@[^\\]\%(\\\\\)*\\' .. s:delimiter .. '\|\[.\{-}' .. s:delimiter .. '@'
+         \ 'end=@' .. s:delimiter .. '\%(IM\|MI\|[IM]\)\=@'
+         \ 'contains=sedTab,sedRegexpMeta'
+    exe 'syn region sedRegexp' .. s:i 'contained'
+         \ 'matchgroup=Delimiter'
+         \ 'start=@' .. s:delimiter .. '\%(\\\\\|\\' .. s:delimiter .. '\)*@'
+         \ 'end=@' .. s:delimiter .. '@me=e-1'
+         \ 'nextgroup=sedReplacement' .. s:i
+         \ 'contains=sedTab,sedRegexpMeta'
+    exe 'syn region sedReplacement' .. s:i 'contained'
+         \ 'matchgroup=Delimiter'
+         \ 'start=@' .. s:delimiter .. '\%(\\\\\|\\' .. s:delimiter .. '\)*@'
+         \ 'end=@' .. s:delimiter .. '@'
+         \ 'nextgroup=@sedFlags'
+         \ 'contains=sedTab,sedReplaceMeta'
   endif
   let s:i = s:i + 1
 endwhile
-syn region sedAddress matchgroup=Special start=+\\@\%(\\@\)\=+ skip=+[^\\]\%(\\\\\)*\\@+ end=+@I\=+ contains=sedTab,sedRegexpMeta
-syn region sedRegexp64 matchgroup=Special start=+@\%(\\\\\|\\@\)*+ skip=+[^\\@]\%(\\\\\)*\\@+ end=+@+me=e-1 contains=sedTab,sedRegexpMeta keepend contained nextgroup=sedReplacement64
-syn region sedReplacement64 matchgroup=Special start=+@\%(\\\\\|\\@\)*+ skip=+[^\\@]\%(\\\\\)*\\@+ end=+@+ contains=sedTab,sedReplaceMeta keepend contained nextgroup=sedFlag
+syn region sedAddress
+      \ matchgroup=Delimiter
+      \ start=+\\\z(@\)+
+      \ end=+\z1\%(IM\|MI\|[IM]\)\=+
+      \ contains=sedTab,sedRegexpMeta
+syn region sedRegexp64 contained
+      \ matchgroup=Delimiter
+      \ start=+@\%(\\\\\|\\@\)*+
+      \ end=+@+me=e-1
+      \ nextgroup=sedReplacement64
+      \ contains=sedTab,sedRegexpMeta
+syn region sedReplacement64 contained
+      \ matchgroup=Delimiter
+      \ start=+@\%(\\\\\|\\@\)*+
+      \ end=+@+
+      \ nextgroup=sedFlag
+      \ contains=sedTab,sedReplaceMeta
 
 " Since the syntax for the substitution command is very similar to the
 " syntax for the transform command, I use the same pattern matching
@@ -110,8 +160,8 @@ if s:highlight_tabs
 endif
 let s:i = char2nr(" ") " ASCII: 32, EBCDIC: 64
 while s:i <= s:last
-  exe "hi def link sedRegexp".s:i      "Macro"
-  exe "hi def link sedReplacement".s:i "NONE"
+  exe "hi def link sedRegexp" .. s:i           "Macro"
+  exe "hi def link sedReplacement" .. s:i      "NONE"
   let s:i = s:i + 1
 endwhile
 
@@ -120,4 +170,4 @@ unlet s:highlight_tabs
 
 let b:current_syntax = "sed"
 
-" vim: nowrap sw=2 sts=2 ts=8 noet:
+" vim: nowrap sw=2 sts=2 ts=8 noet fdm=marker:
diff --git a/runtime/syntax/testdir/dumps/sed_00.dump b/runtime/syntax/testdir/dumps/sed_00.dump
new file mode 100644 (file)
index 0000000..99d59d2
--- /dev/null
@@ -0,0 +1,20 @@
+>#+0#0000e05#ffffff0| |s|e|d|(|1|)| +0#0000000&@66
+@75
+@75
+|#+0#0000e05&| |A|d@1|r|e|s@1|e|s| +0#0000000&@63
+@75
+@75
+|#+0#0000e05&| |l|i|n|e| |n|u|m|b|e|r| +0#0000000&@61
+@75
+|4+0#e000e06&|2|p+0#00e0e07&| +0#0000000&@71
+|$+0#e000e06&|p+0#00e0e07&| +0#0000000&@72
+@75
+|/+0#e000e06&|f|o@1|b|a|r|/|p+0#00e0e07&| +0#0000000&@65
+|/+0#e000e06&|f|o@1|[|/|]|b|a|r|/|p+0#00e0e07&| +0#0000000&@62
+|/+0#e000e06&|f|o@1|\|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@63
+@75
+|\+0#e000e06&|x|f|o@1|b|a|r|x|p+0#00e0e07&| +0#0000000&@64
+|\+0#e000e06&|x|f|o@1|\|x|b|a|r|x|p+0#00e0e07&| +0#0000000&@62
+|\+0#e000e06&|x|f|o@1|[|x|]|b|a|r|x|p+0#00e0e07&| +0#0000000&@61
+@75
+@57|1|,|1| @10|T|o|p| 
diff --git a/runtime/syntax/testdir/dumps/sed_01.dump b/runtime/syntax/testdir/dumps/sed_01.dump
new file mode 100644 (file)
index 0000000..1d56483
--- /dev/null
@@ -0,0 +1,20 @@
+|/+0#e000e06#ffffff0|f|o@1|\|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@63
+@75
+|\+0#e000e06&|x|f|o@1|b|a|r|x|p+0#00e0e07&| +0#0000000&@64
+|\+0#e000e06&|x|f|o@1|\|x|b|a|r|x|p+0#00e0e07&| +0#0000000&@62
+|\+0#e000e06&|x|f|o@1|[|x|]|b|a|r|x|p+0#00e0e07&| +0#0000000&@61
+> @74
+|#+0#0000e05&| |s|k|i|p| |b|r|a|c|k|e|t| |e|x|p|r|e|s@1|i|o|n|s| +0#0000000&@48
+|\+0#e000e06&|a|_|\|a|_|[|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@40
+|\+0#e000e06&|a|_|\|a|_|[|^|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@39
+|\+0#e000e06&|a|_|\|a|_|[|]|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@39
+|\+0#e000e06&|a|_|\|a|_|[|^|]|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@38
+@75
+@75
+|#+0#0000e05&| |r|a|n|g|e| +0#0000000&@67
+@75
+|4+0#e000e06&|2|,|8|4|p+0#00e0e07&| +0#0000000&@68
+|/+0#e000e06&|f|o@1|/|,|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@62
+@75
+|/+0#e000e06&|f|o@1|/|,|4|2|p+0#00e0e07&| +0#0000000&@65
+@57|1|9|,|0|-|1| @7|2|8|%| 
diff --git a/runtime/syntax/testdir/dumps/sed_02.dump b/runtime/syntax/testdir/dumps/sed_02.dump
new file mode 100644 (file)
index 0000000..a3dc09f
--- /dev/null
@@ -0,0 +1,20 @@
+|/+0#e000e06#ffffff0|f|o@1|/|,|4|2|p+0#00e0e07&| +0#0000000&@65
+|4+0#e000e06&|2|,|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@65
+@75
+@75
+|#+0#0000e05&| |G|N|U| |e|x|t|e|n|s|i|o|n|s| +0#0000000&@58
+> @74
+@75
+|#+0#0000e05&| |s|t|e|p| +0#0000000&@68
+@75
+|1+0#e000e06&|~|2|p+0#00e0e07&| +0#0000000&@70
+@75
+@75
+|#+0#0000e05&| |i|g|n|o|r|e| |c|a|s|e|,| |m|u|l|t|i|l|i|n|e| +0#0000000&@50
+@75
+|/+0#e000e06&|f|o@1|b|a|r|/|I|p+0#00e0e07&| +0#0000000&@64
+|/+0#e000e06&|f|o@1|b|a|r|/|M|p+0#00e0e07&| +0#0000000&@64
+|/+0#e000e06&|f|o@1|b|a|r|/|I|M|p+0#00e0e07&| +0#0000000&@63
+|/+0#e000e06&|f|o@1|b|a|r|/|M|I|p+0#00e0e07&| +0#0000000&@63
+@75
+@57|3|7|,|0|-|1| @7|6|7|%| 
diff --git a/runtime/syntax/testdir/dumps/sed_03.dump b/runtime/syntax/testdir/dumps/sed_03.dump
new file mode 100644 (file)
index 0000000..4652e9c
--- /dev/null
@@ -0,0 +1,20 @@
+| +0&#ffffff0@74
+|\+0#e000e06&|a|f|o@1|b|\|a|r|a|I|p+0#00e0e07&| +0#0000000&@62
+|\+0#e000e06&|a|f|o@1|b|\|a|r|a|M|p+0#00e0e07&| +0#0000000&@62
+|\+0#e000e06&|a|f|o@1|b|\|a|r|a|I|M|p+0#00e0e07&| +0#0000000&@61
+|\+0#e000e06&|a|f|o@1|b|\|a|r|a|M|I|p+0#00e0e07&| +0#0000000&@61
+> @74
+@75
+|#+0#0000e05&| |i|n|c|r|e|m|e|n|t| +0#0000000&@63
+@75
+|4+0#e000e06&|2|,|+|4|2|p+0#00e0e07&| +0#0000000&@67
+@75
+@75
+|#+0#0000e05&| |s|t|e|p| +0#0000000&@68
+@75
+|4+0#e000e06&|2|,|~|2|p+0#00e0e07&| +0#0000000&@68
+@75
+|~+0#4040ff13&| @73
+|~| @73
+|~| @73
+| +0#0000000&@56|5@1|,|0|-|1| @7|B|o|t| 
diff --git a/runtime/syntax/testdir/input/sed.sed b/runtime/syntax/testdir/input/sed.sed
new file mode 100644 (file)
index 0000000..a6f248e
--- /dev/null
@@ -0,0 +1,65 @@
+# sed(1)
+
+
+# Addresses
+
+
+# line number
+
+42p
+$p
+
+/foobar/p
+/foo[/]bar/p
+/foo\/bar/p
+
+\xfoobarxp
+\xfoo\xbarxp
+\xfoo[x]barxp
+
+# skip bracket expressions
+\a_\a_[a[:ascii:]a[.a.]a[=a=]a]_ap
+\a_\a_[^a[:ascii:]a[.a.]a[=a=]a]_ap
+\a_\a_[]a[:ascii:]a[.a.]a[=a=]a]_ap
+\a_\a_[^]a[:ascii:]a[.a.]a[=a=]a]_ap
+
+
+# range
+
+42,84p
+/foo/,/bar/p
+
+/foo/,42p
+42,/bar/p
+
+
+# GNU extensions
+
+
+# step
+
+1~2p
+
+
+# ignore case, multiline
+
+/foobar/Ip
+/foobar/Mp
+/foobar/IMp
+/foobar/MIp
+
+\afoob\araIp
+\afoob\araMp
+\afoob\araIMp
+\afoob\araMIp
+
+
+# increment
+
+42,+42p
+
+
+# step
+
+42,~2p
+