--- /dev/null
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: comments.py:4
+msgid "foo"
+msgstr ""
+
+#. i18n: This is a translator comment
+#: comments.py:7
+msgid "bar"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. i18n: This is another translator comment
+#: comments.py:11
+msgid "baz"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. with multiple
+#. lines
+#: comments.py:16
+msgid "qux"
+msgstr ""
+
+#. i18n: This is a translator comment
+#: comments.py:21
+msgid "quux"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. with multiple lines
+#. i18n: This is another translator comment
+#. with multiple lines
+#: comments.py:27
+msgid "corge"
+msgstr ""
+
+#: comments.py:31
+msgid "grault"
+msgstr ""
+
+#. i18n: This is another translator comment
+#: comments.py:36
+msgid "garply"
+msgstr ""
+
+#: comments.py:40
+msgid "george"
+msgstr ""
+
+#. i18n: This is another translator comment
+#: comments.py:45
+msgid "waldo"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. i18n: This is also a translator comment
+#. i18n: This is another translator comment
+#: comments.py:50
+msgid "waldo2"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. i18n: This is another translator comment
+#. i18n: This is yet another translator comment
+#. i18n: This is a translator comment
+#. with multiple lines
+#: comments.py:53 comments.py:56 comments.py:59 comments.py:63
+msgid "fred"
+msgstr ""
+
+#: comments.py:65
+msgid "plugh"
+msgstr ""
+
+#: comments.py:67
+msgid "foobar"
+msgstr ""
+
+#. i18n: This is a translator comment
+#: comments.py:71
+msgid "xyzzy"
+msgstr ""
+
+#: comments.py:72
+msgid "thud"
+msgstr ""
+
+#. i18n: This is a translator comment
+#. i18n: This is another translator comment
+#. i18n: This is yet another translator comment
+#: comments.py:78
+msgid "foos"
+msgstr ""
+
--- /dev/null
+from gettext import gettext as _
+
+# Not a translator comment
+_('foo')
+
+# i18n: This is a translator comment
+_('bar')
+
+# i18n: This is a translator comment
+# i18n: This is another translator comment
+_('baz')
+
+# i18n: This is a translator comment
+# with multiple
+# lines
+_('qux')
+
+# This comment should not be included because
+# it does not start with the prefix
+# i18n: This is a translator comment
+_('quux')
+
+# i18n: This is a translator comment
+# with multiple lines
+# i18n: This is another translator comment
+# with multiple lines
+_('corge')
+
+# i18n: This comment should be ignored
+
+_('grault')
+
+# i18n: This comment should be ignored
+
+# i18n: This is another translator comment
+_('garply')
+
+# i18n: comment should be ignored
+x = 1
+_('george')
+
+# i18n: This comment should be ignored
+x = 1
+# i18n: This is another translator comment
+_('waldo')
+
+# i18n: This is a translator comment
+x = 1 # i18n: This is also a translator comment
+# i18n: This is another translator comment
+_('waldo2')
+
+# i18n: This is a translator comment
+_('fred')
+
+# i18n: This is another translator comment
+_('fred')
+
+# i18n: This is yet another translator comment
+_('fred')
+
+# i18n: This is a translator comment
+# with multiple lines
+_('fred')
+
+_('plugh') # i18n: This comment should be ignored
+
+_('foo' # i18n: This comment should be ignored
+ 'bar') # i18n: This comment should be ignored
+
+# i18n: This is a translator comment
+_('xyzzy')
+_('thud')
+
+
+## i18n: This is a translator comment
+# # i18n: This is another translator comment
+### ### i18n: This is yet another translator comment
+_('foos')
self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
- def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False):
+ def extract_from_str(self, module_content, *, args=(), strict=True,
+ with_stderr=False, raw=False):
"""Return all msgids extracted from module_content."""
filename = 'test.py'
with temp_cwd(None):
self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
- msgids = self.get_msgids(data)
+ if not raw:
+ data = self.get_msgids(data)
if not with_stderr:
- return msgids
- return msgids, res.err
+ return data
+ return data, res.err
def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content."""
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
- assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
+ assert_python_ok('-Xutf8', self.script, '--docstrings',
+ '--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
expected = output_file.read_text(encoding='utf-8')
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
)
+ def test_extract_all_comments(self):
+ """
+ Test that the --add-comments option without an
+ explicit tag extracts all translator comments.
+ """
+ for arg in ('--add-comments', '-c'):
+ with self.subTest(arg=arg):
+ data = self.extract_from_str(dedent('''\
+ # Translator comment
+ _("foo")
+ '''), args=(arg,), raw=True)
+ self.assertIn('#. Translator comment', data)
+
+ def test_comments_with_multiple_tags(self):
+ """
+ Test that multiple --add-comments tags can be specified.
+ """
+ for arg in ('--add-comments={}', '-c{}'):
+ with self.subTest(arg=arg):
+ args = (arg.format('foo:'), arg.format('bar:'))
+ data = self.extract_from_str(dedent('''\
+ # foo: comment
+ _("foo")
+
+ # bar: comment
+ _("bar")
+
+ # baz: comment
+ _("baz")
+ '''), args=args, raw=True)
+ self.assertIn('#. foo: comment', data)
+ self.assertIn('#. bar: comment', data)
+ self.assertNotIn('#. baz: comment', data)
+
+ def test_comments_not_extracted_without_tags(self):
+ """
+ Test that translator comments are not extracted without
+ specifying --add-comments.
+ """
+ data = self.extract_from_str(dedent('''\
+ # Translator comment
+ _("foo")
+ '''), raw=True)
+ self.assertNotIn('#.', data)
+
def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
- assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
+ assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
+ '--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
output = normalize_POT_file(output)
--- /dev/null
+Add support for translator comments in :program:`pygettext.py`.
--extract-all
Extract all strings.
+ -cTAG
+ --add-comments=TAG
+ Extract translator comments. Comments must start with TAG and
+ must precede the gettext call. Multiple -cTAG options are allowed.
+ In that case, any comment matching any of the TAGs will be extracted.
+
-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot.
import os
import sys
import time
+import tokenize
from dataclasses import dataclass, field
+from io import BytesIO
from operator import itemgetter
__version__ = '1.5'
msgctxt: str | None
locations: set[Location] = field(default_factory=set)
is_docstring: bool = False
+ comments: list[str] = field(default_factory=list)
- def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False):
+ def add_location(self, filename, lineno, msgid_plural=None, *,
+ is_docstring=False, comments=None):
if self.msgid_plural is None:
self.msgid_plural = msgid_plural
self.locations.add(Location(filename, lineno))
self.is_docstring |= is_docstring
+ if comments:
+ self.comments.extend(comments)
+
+
+def get_source_comments(source):
+ """
+ Return a dictionary mapping line numbers to
+ comments in the source code.
+ """
+ comments = {}
+ for token in tokenize.tokenize(BytesIO(source).readline):
+ if token.type == tokenize.COMMENT:
+ # Remove any leading combination of '#' and whitespace
+ comment = token.string.lstrip('# \t')
+ comments[token.start[0]] = comment
+ return comments
class GettextVisitor(ast.NodeVisitor):
self.options = options
self.filename = None
self.messages = {}
+ self.comments = {}
+
+ def visit_file(self, source, filename):
+ try:
+ module_tree = ast.parse(source)
+ except SyntaxError:
+ return
- def visit_file(self, node, filename):
self.filename = filename
- self.visit(node)
+ if self.options.comment_tags:
+ self.comments = get_source_comments(source)
+ self.visit(module_tree)
def visit_Module(self, node):
self._extract_docstring(node)
msg_data[arg_type] = arg.value
lineno = node.lineno
- self._add_message(lineno, **msg_data)
+ comments = self._extract_comments(node)
+ self._add_message(lineno, **msg_data, comments=comments)
+
+ def _extract_comments(self, node):
+ """Extract translator comments.
+
+ Translator comments must precede the gettext call and
+ start with one of the comment prefixes defined by
+ --add-comments=TAG. See the tests for examples.
+ """
+ if not self.options.comment_tags:
+ return []
+
+ comments = []
+ lineno = node.lineno - 1
+ # Collect an unbroken sequence of comments starting from
+ # the line above the gettext call.
+ while lineno >= 1:
+ comment = self.comments.get(lineno)
+ if comment is None:
+ break
+ comments.append(comment)
+ lineno -= 1
+
+ # Find the first translator comment in the sequence and
+ # return all comments starting from that comment.
+ comments = comments[::-1]
+ first_index = next((i for i, comment in enumerate(comments)
+ if self._is_translator_comment(comment)), None)
+ if first_index is None:
+ return []
+ return comments[first_index:]
+
+ def _is_translator_comment(self, comment):
+ return comment.startswith(self.options.comment_tags)
def _add_message(
self, lineno, msgid, msgid_plural=None, msgctxt=None, *,
- is_docstring=False):
+ is_docstring=False, comments=None):
if msgid in self.options.toexclude:
return
+ if not comments:
+ comments = []
+
key = self._key_for(msgid, msgctxt)
message = self.messages.get(key)
if message:
lineno,
msgid_plural,
is_docstring=is_docstring,
+ comments=comments,
)
else:
self.messages[key] = Message(
msgctxt=msgctxt,
locations={Location(self.filename, lineno)},
is_docstring=is_docstring,
+ comments=comments,
)
@staticmethod
for key, locations in sorted_keys:
msg = messages[key]
+
+ for comment in msg.comments:
+ print(f'#. {comment}', file=fp)
+
if options.writelocations:
# location comments are different b/w Solaris and GNU:
if options.locationstyle == options.SOLARIS:
try:
opts, args = getopt.getopt(
sys.argv[1:],
- 'ad:DEhk:Kno:p:S:Vvw:x:X:',
- ['extract-all', 'default-domain=', 'escape', 'help',
- 'keyword=', 'no-default-keywords',
+ 'ac::d:DEhk:Kno:p:S:Vvw:x:X:',
+ ['extract-all', 'add-comments=?', 'default-domain=', 'escape',
+ 'help', 'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings',
excludefilename = ''
docstrings = 0
nodocstrings = {}
+ comment_tags = set()
options = Options()
locations = {'gnu' : options.GNU,
usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
+ elif opt in ('-c', '--add-comments'):
+ options.comment_tags.add(arg)
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
finally:
fp.close()
+ options.comment_tags = tuple(options.comment_tags)
+
# calculate escapes
make_escapes(not options.escape)
with open(filename, 'rb') as fp:
source = fp.read()
- try:
- module_tree = ast.parse(source)
- except SyntaxError:
- continue
-
- visitor.visit_file(module_tree, filename)
+ visitor.visit_file(source, filename)
# write the output
if options.outfile == '-':