From: Bruno Haible <bruno@clisp.org>
Date: Tue, 5 Dec 2023 13:15:42 +0000 (+0100)
Subject: xgettext: Python: Add support for f-strings.
X-Git-Tag: v0.23~283
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=994c5188cbf9f5b9fa9b6af8fba95d9d102f5140;p=thirdparty%2Fgettext.git

xgettext: Python: Add support for f-strings.

Reported by Ben <gnu@mail.qoben.com>
at <https://savannah.gnu.org/bugs/?61596>.

* gettext-tools/src/x-python.c (P7_498_START_OF_EXPRESSION): New macro.
(token_type_498, token_type_l498, token_type_m498, token_type_r498): New enum
items.
(free_token): Treat token_type_498 like token_type_string.
(phase7_getuc): Accept an f_string parameter. Use it to combine {{ to { and
}} to }.
(f_string_depth): New variable.
(struct f_string_level): New type.
(f_string_stack, f_string_stack_alloc): New variables.
(new_f_string_level): New function.
(open_pb): Renamed from open_pbb.
(phase5_get): Consider also the f_string_stack. Accept f"...", fr"...", rf"..."
syntax. Recognize tokens of type token_type_498, token_type_l498,
token_type_m498, token_type_r498.
(x_python_lex): Treat token_type_498 like token_type_string.
(extract_balanced): Handle the new token types.
(extract_python): Initialize f_string_depth and the f_string_stack.
* gettext-tools/tests/xgettext-python-1: Test also the backslash handling in
f-strings.
* gettext-tools/tests/xgettext-python-8: New file.
* gettext-tools/tests/Makefile.am (TESTS): Add it.
* gettext-tools/doc/lang-python.texi (Python): Explain the limitations of
f-string support.
* NEWS: Mention the change.
---

diff --git a/NEWS b/NEWS
index f9e7ae073..a99ecc85f 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
 Version 0.23 - December 2023
 
 * Programming languages support:
+  - Python: xgettext now recognizes the f-string syntax.
   - Vala: Improved recognition of format strings when the string.printf method
     is used.
   - Glade: xgettext has improved support for GtkBuilder 4.
diff --git a/gettext-tools/doc/lang-python.texi b/gettext-tools/doc/lang-python.texi
index f2226b734..08e39171a 100644
--- a/gettext-tools/doc/lang-python.texi
+++ b/gettext-tools/doc/lang-python.texi
@@ -1,5 +1,5 @@
 @c This file is part of the GNU gettext manual.
-@c Copyright (C) 1995-2021 Free Software Foundation, Inc.
+@c Copyright (C) 1995-2023 Free Software Foundation, Inc.
 @c See the file gettext.texi for copying conditions.
 
 @node Python
@@ -92,3 +92,20 @@ individual arguments from format strings like this is only possible with
 the named argument syntax.  (With unnamed arguments, Python -- unlike C --
 verifies that the format string uses all supplied arguments.)
 @end itemize
+
+A note about f-strings (PEP 498): @code{xgettext}
+@itemize @bullet
+@item
+syntactically recognizes f-strings,
+@item
+is able to extract f-strings that contain no sub-expressions.
+@end itemize
+@noindent
+However, @code{xgettext} does not extract f-strings marked for translation
+that contain sub-expressions.  This will not work as expected:
+@smallexample
+_(f"The file @{file[i]@} does not exist.")
+@end smallexample
+@noindent
+because the translator is generally not a programmer and should thus not be
+confronted with expressions from the programming language.
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c
index 3873246de..581b5549b 100644
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -67,7 +67,9 @@
 /* The Python syntax is defined in the Python Reference Manual
    /usr/share/doc/packages/python/html/ref/index.html.
    See also Python-2.0/Parser/tokenizer.c, Python-2.0/Python/compile.c,
-   Python-2.0/Objects/unicodeobject.c.  */
+   Python-2.0/Objects/unicodeobject.c.
+   For the f-strings, refer to https://peps.python.org/pep-0498/
+   and https://docs.python.org/3/reference/lexical_analysis.html#literals .  */
 
 
 /* ====================== Keyword set customization.  ====================== */
@@ -775,6 +777,7 @@ phase3_ungetc (int c)
 /* Return value of phase7_getuc when EOF is reached.  */
 #define P7_EOF (-1)
 #define P7_STRING_END (-2)
+#define P7_498_START_OF_EXPRESSION (-3) /* { */
 
 /* Convert an UTF-16 or UTF-32 code point to a return value that can be
    distinguished from a single-byte return value.  */
@@ -801,6 +804,10 @@ enum token_type_ty
   token_type_lbracket,          /* [ */
   token_type_rbracket,          /* ] */
   token_type_string,            /* "abc", 'abc', """abc""", '''abc''' */
+  token_type_498,               /* f"abc", f'abc', f"""abc""", f'''abc''' */
+  token_type_l498,              /* left part of f-string: f"abc{, f'abc{, f"""abc{, f'''abc{ */
+  token_type_m498,              /* middle part of f-string: }abc{ */
+  token_type_r498,              /* right part of f-string: }abc", }abc', }abc""", }abc''' */
   token_type_symbol,            /* symbol, number */
   token_type_plus,              /* + */
   token_type_other              /* misc. operator */
@@ -811,9 +818,9 @@ typedef struct token_ty token_ty;
 struct token_ty
 {
   token_type_ty type;
-  char *string;                         /* for token_type_symbol */
-  mixed_string_ty *mixed_string;        /* for token_type_string */
-  refcounted_string_list_ty *comment;   /* for token_type_string */
+  char *string;                       /* for token_type_symbol */
+  mixed_string_ty *mixed_string;      /* for token_type_string, token_type_498 */
+  refcounted_string_list_ty *comment; /* for token_type_string, token_type_498 */
   int line_number;
 };
 
@@ -823,7 +830,7 @@ free_token (token_ty *tp)
 {
   if (tp->type == token_type_symbol)
     free (tp->string);
-  if (tp->type == token_type_string)
+  if (tp->type == token_type_string || tp->type == token_type_498)
     {
       mixed_string_free (tp->mixed_string);
       drop_reference (tp->comment);
@@ -847,6 +854,7 @@ free_token (token_ty *tp)
 static int
 phase7_getuc (int quote_char,
               bool triple, bool interpret_ansic, bool interpret_unicode,
+              bool f_string,
               unsigned int *backslash_counter)
 {
   int c;
@@ -900,6 +908,25 @@ phase7_getuc (int quote_char,
           return P7_STRING_END;
         }
 
+      if (f_string)
+        {
+          if (c == '{')
+            {
+              int c1 = phase2_getc ();
+              if (c1 == '{')
+                return UNICODE ('{');
+              phase2_ungetc (c1);
+              return P7_498_START_OF_EXPRESSION;
+            }
+          if (c == '}')
+            {
+              int c1 = phase2_getc ();
+              if (c1 == '}')
+                return UNICODE ('}');
+              phase2_ungetc (c1);
+            }
+        }
+
       if (c != '\\')
         {
           *backslash_counter = 0;
@@ -1168,8 +1195,49 @@ phase7_getuc (int quote_char,
 /* Combine characters into tokens.  Discard whitespace except newlines at
    the end of logical lines.  */
 
-/* Number of pending open parentheses/braces/brackets.  */
-static int open_pbb;
+/* Number of open f-strings f"...{ or f'...{ or f"""...{ or f'''...{ or
+   fr"...{ or fr'...{ or fr"""...{ or fr'''...{ */
+static int f_string_depth;
+
+/* Information per f-string nesting level.  */
+struct f_string_level
+{
+  /* Describes the start and end sequence of the f-string.
+     Only relevant for levels > 0.  */
+  int quote_char;
+  bool interpret_ansic;
+  bool triple;
+  /* Number of open '{' tokens.  */
+  int brace_depth;
+};
+
+/* Stack of f-string nesting levels.
+   The "current" element is f_string_stack[f_string_depth].  */
+static struct f_string_level *f_string_stack;
+/* Number of allocated elements in f_string_stack.  */
+static size_t f_string_stack_alloc;
+
+/* Adds a new f_string_stack level after f_string_depth was incremented.  */
+static void
+new_f_string_level (int quote_char, bool interpret_ansic, bool triple)
+{
+  if (f_string_depth == f_string_stack_alloc)
+    {
+      f_string_stack_alloc = 2 * f_string_stack_alloc + 1;
+      /* Now f_string_depth < f_string_stack_alloc.  */
+      f_string_stack =
+        (struct f_string_level *)
+        xrealloc (f_string_stack,
+                  f_string_stack_alloc * sizeof (struct f_string_level));
+    }
+  f_string_stack[f_string_depth].quote_char = quote_char;
+  f_string_stack[f_string_depth].interpret_ansic = interpret_ansic;
+  f_string_stack[f_string_depth].triple = triple;
+  f_string_stack[f_string_depth].brace_depth = 0;
+}
+
+/* Number of pending open parentheses/brackets.  */
+static int open_pb;
 
 static token_ty phase5_pushback[2];
 static int phase5_pushback_length;
@@ -1207,7 +1275,7 @@ phase5_get (token_ty *tp)
             savable_comment_reset ();
           /* Ignore newline if and only if it is used for implicit line
              joining.  */
-          if (open_pbb > 0)
+          if (open_pb > 0 || f_string_stack[f_string_depth].brace_depth > 0)
             continue;
           tp->type = token_type_other;
           return;
@@ -1229,13 +1297,13 @@ phase5_get (token_ty *tp)
               }
           }
           FALLTHROUGH;
-        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+        case 'A': case 'B': case 'C': case 'D': case 'E':
         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
         case 'M': case 'N': case 'O': case 'P': case 'Q':
         case 'S': case 'T':           case 'V': case 'W': case 'X':
         case 'Y': case 'Z':
         case '_':
-        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'a': case 'b': case 'c': case 'd': case 'e':
         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
         case 'm': case 'n': case 'o': case 'p': case 'q':
         case 's': case 't':           case 'v': case 'w': case 'x':
@@ -1297,9 +1365,38 @@ phase5_get (token_ty *tp)
             int quote_char;
             bool interpret_ansic;
             bool interpret_unicode;
+            bool f_string;
             bool triple;
             unsigned int backslash_counter;
 
+            case 'F': case 'f':
+              {
+                int c1 = phase2_getc ();
+                if (c1 == '"' || c1 == '\'')
+                  {
+                    quote_char = c1;
+                    interpret_ansic = true;
+                    interpret_unicode = false;
+                    f_string = true;
+                    goto string;
+                  }
+                if (c1 == 'R' || c1 == 'r')
+                  {
+                    int c2 = phase2_getc ();
+                    if (c2 == '"' || c2 == '\'')
+                      {
+                        quote_char = c2;
+                        interpret_ansic = false;
+                        interpret_unicode = false;
+                        f_string = true;
+                        goto string;
+                      }
+                    phase2_ungetc (c2);
+                  }
+                phase2_ungetc (c1);
+                goto symbol;
+              }
+
             case 'R': case 'r':
               {
                 int c1 = phase2_getc ();
@@ -1308,8 +1405,22 @@ phase5_get (token_ty *tp)
                     quote_char = c1;
                     interpret_ansic = false;
                     interpret_unicode = false;
+                    f_string = false;
                     goto string;
                   }
+                if (c1 == 'F' || c1 == 'f')
+                  {
+                    int c2 = phase2_getc ();
+                    if (c2 == '"' || c2 == '\'')
+                      {
+                        quote_char = c2;
+                        interpret_ansic = false;
+                        interpret_unicode = false;
+                        f_string = true;
+                        goto string;
+                      }
+                    phase2_ungetc (c2);
+                  }
                 phase2_ungetc (c1);
                 goto symbol;
               }
@@ -1322,6 +1433,7 @@ phase5_get (token_ty *tp)
                     quote_char = c1;
                     interpret_ansic = true;
                     interpret_unicode = true;
+                    f_string = false;
                     goto string;
                   }
                 if (c1 == 'R' || c1 == 'r')
@@ -1332,6 +1444,7 @@ phase5_get (token_ty *tp)
                         quote_char = c2;
                         interpret_ansic = false;
                         interpret_unicode = true;
+                        f_string = false;
                         goto string;
                       }
                     phase2_ungetc (c2);
@@ -1344,6 +1457,7 @@ phase5_get (token_ty *tp)
               quote_char = c;
               interpret_ansic = true;
               interpret_unicode = false;
+              f_string = false;
             string:
               triple = false;
               lexical_context = lc_string;
@@ -1373,13 +1487,28 @@ phase5_get (token_ty *tp)
                 for (;;)
                   {
                     int uc = phase7_getuc (quote_char, triple, interpret_ansic,
-                                           interpret_unicode, &backslash_counter);
+                                           interpret_unicode, f_string,
+                                           &backslash_counter);
 
                     /* Keep line_number in sync.  */
                     msb.line_number = line_number;
 
                     if (uc == P7_EOF || uc == P7_STRING_END)
-                      break;
+                      {
+                        tp->mixed_string = mixed_string_buffer_result (&msb);
+                        tp->comment = add_reference (savable_comment);
+                        tp->type = (f_string ? token_type_498 : token_type_string);
+                        break;
+                      }
+
+                    if (uc == P7_498_START_OF_EXPRESSION) /* implies f_string */
+                      {
+                        mixed_string_buffer_destroy (&msb);
+                        tp->type = token_type_l498;
+                        f_string_depth++;
+                        new_f_string_level (quote_char, interpret_ansic, triple);
+                        break;
+                      }
 
                     if (IS_UNICODE (uc))
                       {
@@ -1391,22 +1520,58 @@ phase5_get (token_ty *tp)
                     else
                       mixed_string_buffer_append_char (&msb, uc);
                   }
-                tp->mixed_string = mixed_string_buffer_result (&msb);
-                tp->comment = add_reference (savable_comment);
                 lexical_context = lc_outside;
-                tp->type = token_type_string;
               }
               return;
           }
 
+        case '{':
+          f_string_stack[f_string_depth].brace_depth++;
+          tp->type = token_type_other;
+          return;
+
+        case '}':
+          if (f_string_stack[f_string_depth].brace_depth > 0)
+            f_string_stack[f_string_depth].brace_depth--;
+          else if (f_string_depth > 0)
+            {
+              /* Middle or right part of f-string.  */
+              int quote_char = f_string_stack[f_string_depth].quote_char;
+              bool interpret_ansic = f_string_stack[f_string_depth].interpret_ansic;
+              bool triple = f_string_stack[f_string_depth].triple;
+              unsigned int backslash_counter = 0;
+              for (;;)
+                {
+                  int uc = phase7_getuc (quote_char, triple, interpret_ansic,
+                                         false, true,
+                                         &backslash_counter);
+
+                  if (uc == P7_EOF || uc == P7_STRING_END)
+                    {
+                      tp->type = token_type_r498;
+                      f_string_depth--;
+                      break;
+                    }
+
+                  if (uc == P7_498_START_OF_EXPRESSION)
+                    {
+                      tp->type = token_type_m498;
+                      break;
+                    }
+                }
+              return;
+            }
+          tp->type = token_type_other;
+          return;
+
         case '(':
-          open_pbb++;
+          open_pb++;
           tp->type = token_type_lparen;
           return;
 
         case ')':
-          if (open_pbb > 0)
-            open_pbb--;
+          if (open_pb > 0)
+            open_pb--;
           tp->type = token_type_rparen;
           return;
 
@@ -1414,15 +1579,15 @@ phase5_get (token_ty *tp)
           tp->type = token_type_comma;
           return;
 
-        case '[': case '{':
-          open_pbb++;
-          tp->type = (c == '[' ? token_type_lbracket : token_type_other);
+        case '[':
+          open_pb++;
+          tp->type = token_type_lbracket;
           return;
 
-        case ']': case '}':
-          if (open_pbb > 0)
-            open_pbb--;
-          tp->type = (c == ']' ? token_type_rbracket : token_type_other);
+        case ']':
+          if (open_pb > 0)
+            open_pb--;
+          tp->type = token_type_rbracket;
           return;
 
         case '+':
@@ -1460,7 +1625,7 @@ static void
 x_python_lex (token_ty *tp)
 {
   phase5_get (tp);
-  if (tp->type == token_type_string)
+  if (tp->type == token_type_string || tp->type == token_type_498)
     {
       mixed_string_ty *sum = tp->mixed_string;
 
@@ -1476,7 +1641,8 @@ x_python_lex (token_ty *tp)
             case token_type_plus:
               {
                 phase5_get (&token3);
-                if (token3.type == token_type_string)
+                if (token3.type == token_type_string
+                    || token3.type == token_type_498)
                   {
                     free_token (&token2);
                     tp2 = &token3;
@@ -1486,6 +1652,7 @@ x_python_lex (token_ty *tp)
               }
               break;
             case token_type_string:
+            case token_type_498:
               tp2 = &token2;
               break;
             default:
@@ -1667,6 +1834,7 @@ extract_balanced (message_list_ty *mlp,
           continue;
 
         case token_type_string:
+        case token_type_498:
           {
             lex_pos_ty pos;
 
@@ -1697,6 +1865,9 @@ extract_balanced (message_list_ty *mlp,
           unref_region (inner_region);
           return true;
 
+        case token_type_l498:
+        case token_type_m498:
+        case token_type_r498:
         case token_type_plus:
         case token_type_other:
           next_context_iter = null_context_list_iterator;
@@ -1748,10 +1919,13 @@ extract_python (FILE *f,
 
   continuation_or_nonblank_line = false;
 
-  open_pbb = 0;
+  open_pb = 0;
 
   phase5_pushback_length = 0;
 
+  f_string_depth = 0;
+  new_f_string_level (0, false, false);
+
   flag_context_list_table = flag_table;
   paren_nesting_depth = 0;
   bracket_nesting_depth = 0;
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am
index 4e432e63a..9e37ff142 100644
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -146,7 +146,7 @@ TESTS = gettext-1 gettext-2 \
 	xgettext-rst-1 xgettext-rst-2 \
 	xgettext-python-1 xgettext-python-2 xgettext-python-3 \
 	xgettext-python-4 xgettext-python-5 xgettext-python-6 \
-	xgettext-python-7 \
+	xgettext-python-7 xgettext-python-8 \
 	xgettext-python-stackovfl-1 xgettext-python-stackovfl-2 \
 	xgettext-python-stackovfl-3 xgettext-python-stackovfl-4 \
 	xgettext-ruby-1 \
diff --git a/gettext-tools/tests/xgettext-python-1 b/gettext-tools/tests/xgettext-python-1
index 9695abda4..ec7f92d9a 100755
--- a/gettext-tools/tests/xgettext-python-1
+++ b/gettext-tools/tests/xgettext-python-1
@@ -4,22 +4,30 @@
 # Test of Python support.
 
 cat <<\EOF > xg-py-1.py
-# interpret_ansic = true, interpret_unicode = false
+# interpret_ansic = true, interpret_unicode = false, f_string = false
 _("abc\
 \\def\'ghi\"jkl\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123\N{LATIN SMALL LETTER Z}");
 
-# interpret_ansic = false, interpret_unicode = false
+# interpret_ansic = false, interpret_unicode = false, f_string = false
 _(r"abc\
 \\def\'ghi\"jkl\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123\N{LATIN SMALL LETTER Z}");
 
-# interpret_ansic = true, interpret_unicode = true
+# interpret_ansic = true, interpret_unicode = true, f_string = false
 _(u"abc\
 \\def\'ghi\"jkl\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123\N{LATIN SMALL LETTER Z}");
 
-# interpret_ansic = false, interpret_unicode = true
+# interpret_ansic = false, interpret_unicode = true, f_string = false
 _(ur"abc\
 \\def\'ghi\"jkl\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123\N{LATIN SMALL LETTER Z}");
 
+# interpret_ansic = true, interpret_unicode = false, f_string = true
+_(f"abc\
+\\def\'ghi\"jkl{{m\{{n\\{{o\\\{{p\\\\{{q\\\\\{{r}}s\}}t\\}}u\\\}}v\\\\}}w\\\\\}}x\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123");
+
+# interpret_ansic = false, interpret_unicode = false, f_string = true
+_(fr"Abc\
+\\def\'ghi\"jkl{{m\{{n\\{{o\\\{{p\\\\{{q\\\\\{{r}}s\}}t\\}}u\\\}}v\\\\}}w\\\\\}}x\a\b\f\n\r\t\v x\040x\x7ey\u0142\U00010123");
+
 # This will not be extracted.
 _(CATEGORIES["default"]["name"]);
 
@@ -52,32 +60,47 @@ msgstr ""
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 
-#. interpret_ansic = true, interpret_unicode = false
+#. interpret_ansic = true, interpret_unicode = false, f_string = false
 msgid ""
 "abc\\def'ghi\"jkl\a\b\f\n"
 "\r\t\v x x~y\\u0142\\U00010123\\N{LATIN SMALL LETTER Z}"
 msgstr ""
 
-#. interpret_ansic = false, interpret_unicode = false
+#. interpret_ansic = false, interpret_unicode = false, f_string = false
 msgid ""
 "abc\\\n"
 "\\\\def\\'ghi\\\"jkl\\a\\b\\f\\n\\r\\t\\v "
 "x\\040x\\x7ey\\u0142\\U00010123\\N{LATIN SMALL LETTER Z}"
 msgstr ""
 
-#. interpret_ansic = true, interpret_unicode = true
+#. interpret_ansic = true, interpret_unicode = true, f_string = false
 msgid ""
 "abc\\def'ghi\"jkl\a\b\f\n"
 "\r\t\v x x~yÅð£z"
 msgstr ""
 
-#. interpret_ansic = false, interpret_unicode = true
+#. interpret_ansic = false, interpret_unicode = true, f_string = false
 msgid ""
 "abc\\\n"
 "\\\\def\\'ghi\\\"jkl\\a\\b\\f\\n\\r\\t\\v x\\040x\\x7eyÅ\\U00010123\\N{LATIN "
 "SMALL LETTER Z}"
 msgstr ""
 
+#. interpret_ansic = true, interpret_unicode = false, f_string = true
+msgid ""
+"abc\\def'ghi\"jkl{m\\{n\\{o\\\\{p\\\\{q\\\\\\{r}s\\}t\\}u\\\\}v\\\\}w\\\\\\}"
+"x\a\b\f\n"
+"\r\t\v x x~y\\u0142\\U00010123"
+msgstr ""
+
+#. interpret_ansic = false, interpret_unicode = false, f_string = true
+msgid ""
+"Abc\\\n"
+"\\\\def\\'ghi\\\"jkl{m\\{n\\\\{o\\\\\\{p\\\\\\\\{q\\\\\\\\\\{r}s\\}t\\\\}u\\"
+"\\\\}v\\\\\\\\}w\\\\\\\\\\}x\\a\\b\\f\\n\\r\\t\\v "
+"x\\040x\\x7ey\\u0142\\U00010123"
+msgstr ""
+
 #. string concatenation
 msgid "abcdefghijkl"
 msgstr ""
diff --git a/gettext-tools/tests/xgettext-python-8 b/gettext-tools/tests/xgettext-python-8
new file mode 100755
index 000000000..9910be3c5
--- /dev/null
+++ b/gettext-tools/tests/xgettext-python-8
@@ -0,0 +1,73 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of Python f-string support.
+
+cat <<\EOF > xg-py-8.py
+s0 = _(fr'An f-string without substitutions');
+s1 = _(fr'''An f-string with
+embedded
+newlines''');
+s2 = _(fr'An f-string with {n} substitutions');
+s3 = _(fr'An f-string with several substitutions: {a} and {b} and {c} and so on');
+s4 = fr"that's a valid string. " + _('This too');
+s5 = fr'''a{fr'b{fr"c"+d}'}e''';
+s6 = _("a normal string");
+s7 = fr'abc{foo({},_('should be extracted'))}xyz';
+return _("first normal string") + fr'{foo}' + _("second normal string");
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-py-8.tmp xg-py-8.py 2>xg-py-8.err
+test $? = 0 || { cat xg-py-8.err; Exit 1; }
+func_filter_POT_Creation_Date xg-py-8.tmp xg-py-8.pot
+
+cat <<\EOF > xg-py-8.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "An f-string without substitutions"
+msgstr ""
+
+msgid ""
+"An f-string with\n"
+"embedded\n"
+"newlines"
+msgstr ""
+
+msgid "This too"
+msgstr ""
+
+msgid "a normal string"
+msgstr ""
+
+msgid "should be extracted"
+msgstr ""
+
+msgid "first normal string"
+msgstr ""
+
+msgid "second normal string"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-py-8.ok xg-py-8.pot
+result=$?
+
+exit $result