]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
runtime(python2): highlight unicode strings in python2
authorRob B <github@0x7e.net>
Mon, 14 Jul 2025 20:21:44 +0000 (22:21 +0200)
committerChristian Brabandt <cb@256bit.org>
Mon, 14 Jul 2025 20:21:44 +0000 (22:21 +0200)
fixes: #14033
fixes: #17726
closes: #17729

Signed-off-by: Rob B <github@0x7e.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>
runtime/syntax/python2.vim
runtime/syntax/testdir/dumps/python2_strings_00.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/python2_strings_01.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/python2_strings_02.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/python2_strings_03.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/python2_strings_04.dump [new file with mode: 0644]
runtime/syntax/testdir/dumps/python2_strings_05.dump [new file with mode: 0644]
runtime/syntax/testdir/input/python2_strings.py [new file with mode: 0644]

index 3b30eabbae0d164ae5b0e4c5018399b0a3ad94c8..90c8dcaa338abfb894ef2b5a0e9bcd95a2cc2278 100644 (file)
@@ -2,8 +2,10 @@
 " Language:    Python 2
 " Maintainer:  Zvezdan Petkovic <zpetkovic@acm.org>
 " Last Change: 2016 Oct 29
+" 2025 Jul 14 by Vim project: highlight unicode strings
 " Credits:     Neil Schemenauer <nas@python.ca>
 "              Dmitry Vasiliev
+"              Rob B
 "
 "              This version is a major rewrite by Zvezdan Petkovic.
 "
@@ -141,24 +143,53 @@ syn keyword pythonTodo            FIXME NOTE NOTES TODO XXX contained
 
 " Triple-quoted strings can contain doctests.
 syn region  pythonString matchgroup=pythonQuotes
-      \ start=+[uU]\=\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
+      \ start=+\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
       \ contains=pythonEscape,@Spell
 syn region  pythonString matchgroup=pythonTripleQuotes
-      \ start=+[uU]\=\z('''\|"""\)+ end="\z1" keepend
+      \ start=+\z('''\|"""\)+ end="\z1" keepend
       \ contains=pythonEscape,pythonSpaceError,pythonDoctest,@Spell
 syn region  pythonRawString matchgroup=pythonQuotes
-      \ start=+[uU]\=[rR]\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
+      \ start=+[rR]\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
       \ contains=@Spell
 syn region  pythonRawString matchgroup=pythonTripleQuotes
-      \ start=+[uU]\=[rR]\z('''\|"""\)+ end="\z1" keepend
+      \ start=+[rR]\z('''\|"""\)+ end="\z1" keepend
       \ contains=pythonSpaceError,pythonDoctest,@Spell
 
+" Unicode strings
+syn region  pythonString
+      \ matchgroup=pythonQuotes
+      \ start=+[uU]\z(['"]\)+
+      \ end="\z1"
+      \ skip="\\\\\|\\\z1"
+      \ contains=pythonEscape,pythonUnicodeEscape,@Spell
+syn region  pythonString
+      \ matchgroup=pythonTripleQuotes
+      \ start=+[uU]\z('''\|"""\)+
+      \ end="\z1"
+      \ keepend
+      \ contains=pythonEscape,pythonUnicodeEscape,pythonSpaceError,pythonDoctest,@Spell
+
+" Raw Unicode strings recognize Unicode escape sequences
+" https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
+syn region  pythonRawString
+      \ matchgroup=pythonQuotes
+      \ start=+[uU][rR]\z(['"]\)+
+      \ end="\z1"
+      \ skip="\\\\\|\\\z1"
+      \ contains=pythonUnicodeEscape,@Spell
+syn region  pythonRawString
+      \ matchgroup=pythonTripleQuotes
+      \ start=+[uU][rR]\z('''\|"""\)+
+      \ end="\z1"
+      \ keepend
+      \ contains=pythonUnicodeEscape,pythonSpaceError,pythonDoctest,@Spell
+
 syn match   pythonEscape       +\\[abfnrtv'"\\]+ contained
 syn match   pythonEscape       "\\\o\{1,3}" contained
 syn match   pythonEscape       "\\x\x\{2}" contained
-syn match   pythonEscape       "\%(\\u\x\{4}\|\\U\x\{8}\)" contained
+syn match   pythonUnicodeEscape        "\%(\\u\x\{4}\|\\U\x\{8}\)" contained
 " Python allows case-insensitive Unicode IDs: http://www.unicode.org/charts/
-syn match   pythonEscape       "\\N{\a\+\%(\s\a\+\)*}" contained
+syn match   pythonUnicodeEscape        "\\N{\a\+\%(\s\a\+\)*}" contained
 syn match   pythonEscape       "\\$"
 
 " It is very important to understand all details before changing the
@@ -320,6 +351,7 @@ hi def link pythonRawString         String
 hi def link pythonQuotes               String
 hi def link pythonTripleQuotes         pythonQuotes
 hi def link pythonEscape               Special
+hi def link pythonUnicodeEscape                pythonEscape
 if !exists("python_no_number_highlight")
   hi def link pythonNumber             Number
 endif
diff --git a/runtime/syntax/testdir/dumps/python2_strings_00.dump b/runtime/syntax/testdir/dumps/python2_strings_00.dump
new file mode 100644 (file)
index 0000000..ef770d3
--- /dev/null
@@ -0,0 +1,20 @@
+>#+0#0000e05#ffffff0| |S|t|r|i|n|g| |l|i|t|e|r|a|l|s| +0#0000000&@57
+|#+0#0000e05&| |h|t@1|p|s|:|/@1|d|o|c|s|.|p|y|t|h|o|n|.|o|r|g|/|2|/|r|e|f|e|r|e|n|c|e|/|l|e|x|i|c|a|l|_|a|n|a|l|y|s|i|s|.|h|t|m|l|#|s|t|r|i|n|g|-|l|i|t|e|r|a|l|s
+| +0#0000000&@74
+|#+0#0000e05&| |S|t|r|i|n|g|s|:| |S|o|u|r|c|e| |e|n|c|o|d|i|n|g|,| |n|o| |U|n|i|c|o|d|e| |e|s|c|a|p|e| |s|e|q|u|e|n|c|e|s| +0#0000000&@19
+|t|e|s|t| |=| |'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@29
+|t|e|s|t| |=| |"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@32
+|t|e|s|t| |=| |'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T|I
+|O|N| |M|A|R|K|}|'| +0#0000000&@65
+|t|e|s|t| |=| |"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@20
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |'+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38
+|t|e|s|t| |=| |"+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |R|a|w| |s|t|r|i|n|g|s| +0#0000000&@61
+|t|e|s|t| |=| |r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@24
+@57|1|,|1| @10|T|o|p| 
diff --git a/runtime/syntax/testdir/dumps/python2_strings_01.dump b/runtime/syntax/testdir/dumps/python2_strings_01.dump
new file mode 100644 (file)
index 0000000..8d0a6a5
--- /dev/null
@@ -0,0 +1,20 @@
+|t+0&#ffffff0|e|s|t| |=| |"+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |R|a|w| |s|t|r|i|n|g|s| +0#0000000&@61
+>t|e|s|t| |=| |r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@24
+|t|e|s|t| |=| |R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@27
+|t|e|s|t| |=| |r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L
+|A|M|A|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@60
+|t|e|s|t| |=| |R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@13
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |r+0#e000002&|'@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38
+|t|e|s|t| |=| |R+0#e000002&|"@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |B|-|s|t|r|i|n|g|s|:| |P|r|e|f|i|x| |i|s| |a|l@1|o|w|e|d| |b|u|t| |i|g|n|o|r|e|d| |(|h|t@1|p|s|:|/@1|p|e|p|s|.|p|y|t|h|o|n|.|o|r|g|/|p|e|p|-|@+0#4040ff13&@2
+| +0#0000000&@56|1|8|,|1| @9|1|8|%| 
diff --git a/runtime/syntax/testdir/dumps/python2_strings_02.dump b/runtime/syntax/testdir/dumps/python2_strings_02.dump
new file mode 100644 (file)
index 0000000..2ad1938
--- /dev/null
@@ -0,0 +1,20 @@
+|#+0#0000e05#ffffff0| |B|-|s|t|r|i|n|g|s|:| |P|r|e|f|i|x| |i|s| |a|l@1|o|w|e|d| |b|u|t| |i|g|n|o|r|e|d| |(|h|t@1|p|s|:|/@1|p|e|p|s|.|p|y|t|h|o|n|.|o|r|g|/|p|e|p|-|3|1@1
+|2|)| +0#0000000&@72
+|t|e|s|t| |=| |b|'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@28
+|t|e|s|t| |=| |B|"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@31
+|t|e|s|t| |=| |b|'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T
+|I|O|N| |M|A|R|K|}|'| +0#0000000&@64
+>t|e|s|t| |=| |B|"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@19
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |b|'+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38
+|t|e|s|t| |=| |B|"+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |R|a|w| |b|-|s|t|r|i|n|g|s| +0#0000000&@59
+|t|e|s|t| |=| |b|r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@23
+|t|e|s|t| |=| |b|R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@26
+|t|e|s|t| |=| |B|r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |@+0#4040ff13&@2
+| +0#0000000&@56|3|4|,|1| @9|4|3|%| 
diff --git a/runtime/syntax/testdir/dumps/python2_strings_03.dump b/runtime/syntax/testdir/dumps/python2_strings_03.dump
new file mode 100644 (file)
index 0000000..749866e
--- /dev/null
@@ -0,0 +1,20 @@
+|t+0&#ffffff0|e|s|t| |=| |B|r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C
+|L|A|M|A|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@59
+|t|e|s|t| |=| |B|R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@12
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |b|r+0#e000002&|'@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@32
+>a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38
+|t|e|s|t| |=| |B|R+0#e000002&|"@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@32
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |U|n|i|c|o|d|e| |s|t|r|i|n|g|s| +0#0000000&@57
+|t|e|s|t| |=| |u+0#e000002&|'|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@28
+|t|e|s|t| |=| |U+0#e000002&|"|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@31
+|t|e|s|t| |=| |u+0#e000002&|'|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5| +0#e000002&|a|n|d| |\+0#e000e06&|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T
+|I|O|N| |M|A|R|K|}|'+0#e000002&| +0#0000000&@64
+|t|e|s|t| |=| |U+0#e000002&|"|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@19
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |u+0#e000002&|'@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37
+@57|5|0|,|1| @9|6|9|%| 
diff --git a/runtime/syntax/testdir/dumps/python2_strings_04.dump b/runtime/syntax/testdir/dumps/python2_strings_04.dump
new file mode 100644 (file)
index 0000000..c3d8b31
--- /dev/null
@@ -0,0 +1,20 @@
+|t+0&#ffffff0|e|s|t| |=| |u+0#e000002&|'@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|'+0#e000002&@2| +0#0000000&@38
+|t|e|s|t| |=| |U+0#e000002&|"@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42
+>a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@38
+@75
+|#+0#0000e05&| |R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g|s|:| |O|n|l|y| |U|n|i|c|o|d|e| |e|s|c|a|p|e| |s|e|q|u|e|n|c|e|s| +0#0000000&@22
+|t|e|s|t| |=| |u+0#e000002&|r|'|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@15
+|t|e|s|t| |=| |u+0#e000002&|R|"|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@18
+|t|e|s|t| |=| |U+0#e000002&|r|'|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5| +0#e000002&|a|n|d| |\+0#e000e06&|N|{|I|N|V|E
+|R|T|E|D| |E|X|C|L|A|M|A|T|I|O|N| |M|A|R|K|}|'+0#e000002&| +0#0000000&@51
+|t|e|s|t| |=| |U+0#e000002&|R|"|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@4
+|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66
+|t|e|s|t| |=| |u+0#e000002&|r|'@2|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@24
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|'+0#e000002&@2| +0#0000000&@38
+|t|e|s|t| |=| |U+0#e000002&|R|"@2|R|a|w| |U|n|i|c|o|d|e| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@24
+|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+@57|6|7|,|1| @9|9|5|%| 
diff --git a/runtime/syntax/testdir/dumps/python2_strings_05.dump b/runtime/syntax/testdir/dumps/python2_strings_05.dump
new file mode 100644 (file)
index 0000000..0ed41d8
--- /dev/null
@@ -0,0 +1,20 @@
+|a+0#e000002#ffffff0|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42
+|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@38
+@75
+>#+0#0000e05&| |v|i|m|:| |s|y|n|t|a|x|=|p|y|t|h|o|n|2| +0#0000000&@53
+|~+0#4040ff13&| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+|~| @73
+| +0#0000000&@56|8|2|,|1| @9|B|o|t| 
diff --git a/runtime/syntax/testdir/input/python2_strings.py b/runtime/syntax/testdir/input/python2_strings.py
new file mode 100644 (file)
index 0000000..a5625b7
--- /dev/null
@@ -0,0 +1,82 @@
+# String literals
+# https://docs.python.org/2/reference/lexical_analysis.html#string-literals
+
+# Strings: Source encoding, no Unicode escape sequences
+test = 'String with escapes \' and \" and \t'
+test = "String with escapes \040 and \xFF"
+test = 'String with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = "String with escaped \\ backslash and ignored \
+newline"
+test = '''String with quotes ' and "
+and escapes \t and \040 and \xFF
+and literal \u00A1 and \U00010605'''
+test = """String with quotes ' and "
+and escapes \t and \040 and \xFF
+and literal \u00A1 and \U00010605"""
+
+# Raw strings
+test = r'Raw string with literal \' and \" and \t'
+test = R"Raw string with literal \040 and \xFF"
+test = r'Raw string with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = R"Raw string with literal \\ backslashes and literal \
+newline"
+test = r'''Raw string with quotes ' and "
+and literal \t and \040 and \xFF
+and literal \u00A1 and \U00010605'''
+test = R"""Raw string with quotes ' and "
+and literal \t and \040 and \xFF
+and literal \u00A1 and \U00010605"""
+
+# B-strings: Prefix is allowed but ignored (https://peps.python.org/pep-3112)
+test = b'String with escapes \' and \" and \t'
+test = B"String with escapes \040 and \xFF"
+test = b'String with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = B"String with escaped \\ backslash and ignored \
+newline"
+test = b'''String with quotes ' and "
+and escapes \t and \040 and \xFF
+and literal \u00A1 and \U00010605'''
+test = B"""String with quotes ' and "
+and escapes \t and \040 and \xFF
+and literal \u00A1 and \U00010605"""
+
+# Raw b-strings
+test = br'Raw string with literal \' and \" and \t'
+test = bR"Raw string with literal \040 and \xFF"
+test = Br'Raw string with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = BR"Raw string with literal \\ backslashes and literal \
+newline"
+test = br'''Raw string with quotes ' and "
+and literal \t and \040 and \xFF
+and literal \u00A1 and \U00010605'''
+test = BR"""Raw string with quotes ' and "
+and literal \t and \040 and \xFF
+and literal \u00A1 and \U00010605"""
+
+# Unicode strings
+test = u'String with escapes \' and \" and \t'
+test = U"String with escapes \040 and \xFF"
+test = u'String with escapes \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = U"String with escaped \\ backslash and ignored \
+newline"
+test = u'''String with quotes ' and "
+and escapes \t and \040 and \xFF
+and escapes \u00A1 and \U00010605'''
+test = U"""String with quotes ' and "
+and escapes \t and \040 and \xFF
+and escapes \u00A1 and \U00010605"""
+
+# Raw Unicode strings: Only Unicode escape sequences
+test = ur'Raw Unicode string with literal \' and \" and \t'
+test = uR"Raw Unicode string with literal \040 and \xFF"
+test = Ur'Raw Unicode string with escapes \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}'
+test = UR"Raw Unicode string with literal \\ backslashes and literal \
+newline"
+test = ur'''Raw Unicode string with quotes ' and "
+and literal \t and \040 and \xFF
+and escapes \u00A1 and \U00010605'''
+test = UR"""Raw Unicode string with quotes ' and "
+and literal \t and \040 and \xFF
+and escapes \u00A1 and \U00010605"""
+
+# vim: syntax=python2