Stripping of comment tags is optional now. If enabled it will strip the tags from...

author Armin Ronacher <armin.ronacher@active-4.com>

Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)

committer Armin Ronacher <armin.ronacher@active-4.com>

Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)
author Armin Ronacher <armin.ronacher@active-4.com>
Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)
committer Armin Ronacher <armin.ronacher@active-4.com>
Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)
diff --git a/ChangeLog b/ChangeLog

index fef46cf1480b162e01f8d17ce3268145f88bf069..6bf399d6316cdabd91163fc18dd518a620626995 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,7 +2,9 @@ Version 0.9.3
  http://svn.edgewall.org/repos/babel/tags/0.9.3/
  (?, from branches/stable/0.9.x)
  
-* Fixed invalid message extraction methods causing an UnboundLocalError.
+ * Fixed invalid message extraction methods causing an UnboundLocalError.
+ * The stripping of the comment tags in comments is optional now and
+   is done for each line in a comment.
         
  
  Version 0.9.2
diff --git a/babel/messages/extract.py b/babel/messages/extract.py

index 7383a097e42b2e57e0a50437c00888b4e3f4f1f6..6f8a0d36f9db171cf538288e4b4f225437bd6693 100644 (file)
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -30,6 +30,7 @@ import sys
  from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
  
  from babel.util import parse_encoding, pathmatch, relpath
+from textwrap import dedent
  
  __all__ = ['extract', 'extract_from_dir', 'extract_from_file']
  __docformat__ = 'restructuredtext en'
@@ -53,9 +54,21 @@ empty_msgid_warning = (
  '%s: warning: Empty msgid.  It is reserved by GNU gettext: gettext("") '
  'returns the header entry with meta information, not the empty string.')
  
+
+def _strip_comment_tags(comments, tags):
+    """Helper function for `extract` that strips comment tags from strings
+    in a list of comment lines.  This functions operates in-place.
+    """
+    def _strip(line):
+        for tag in tags:
+            if line.startswith(tag):
+                return line[len(tag):].strip()
+        return line
+    comments[:] = map(_strip, comments)
+
  def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
                       options_map=None, keywords=DEFAULT_KEYWORDS,
-                     comment_tags=(), callback=None):
+                     comment_tags=(), callback=None, strip_comment_tags=False):
      """Extract messages from any source files found in the given directory.
  
      This function generates tuples of the form:
@@ -118,6 +131,8 @@ def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
                       performed; the function is passed the filename, the name
                       of the extraction method and and the options dictionary as
                       positional arguments, in that order
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
      :return: an iterator over ``(filename, lineno, funcname, message)`` tuples
      :rtype: ``iterator``
      :see: `pathmatch`
@@ -147,15 +162,17 @@ def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
                      if callback:
                          callback(filename, method, options)
                      for lineno, message, comments in \
-                                  extract_from_file(method, filepath,
-                                                    keywords=keywords,
-                                                    comment_tags=comment_tags,
-                                                    options=options):
+                          extract_from_file(method, filepath,
+                                            keywords=keywords,
+                                            comment_tags=comment_tags,
+                                            options=options,
+                                            strip_comment_tags=
+                                                strip_comment_tags):
                          yield filename, lineno, message, comments
                      break
  
  def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
-                      comment_tags=(), options=None):
+                      comment_tags=(), options=None, strip_comment_tags=False):
      """Extract messages from a specific file.
  
      This function returns a list of tuples of the form:
@@ -170,18 +187,21 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
                       localizable strings
      :param comment_tags: a list of translator tags to search for and include
                           in the results
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
      :param options: a dictionary of additional options (optional)
      :return: the list of extracted messages
      :rtype: `list`
      """
      fileobj = open(filename, 'U')
      try:
-        return list(extract(method, fileobj, keywords, comment_tags, options))
+        return list(extract(method, fileobj, keywords, comment_tags, options,
+                            strip_comment_tags))
      finally:
          fileobj.close()
  
  def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
-            options=None):
+            options=None, strip_comment_tags=False):
      """Extract messages from the given file-like object using the specified
      extraction method.
  
@@ -216,6 +236,8 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
      :param comment_tags: a list of translator tags to search for and include
                           in the results
      :param options: a dictionary of additional options (optional)
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
      :return: the list of extracted messages
      :rtype: `list`
      :raise ValueError: if the extraction method is not registered
@@ -291,6 +313,10 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
          messages = tuple(msgs)
          if len(messages) == 1:
              messages = messages[0]
+
+        if strip_comment_tags:
+            _strip_comment_tags(comments, comment_tags)
+
          yield lineno, messages, comments
  
  def extract_nothing(fileobj, keywords, comment_tags, options):
@@ -318,6 +344,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
      messages = []
      translator_comments = []
      in_def = in_translator_comments = False
+    comment_tag = None
  
      encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
  
@@ -344,8 +371,6 @@ def extract_python(fileobj, keywords, comment_tags, options):
              if in_translator_comments and \
                      translator_comments[-1][0] == lineno - 1:
                  # We're already inside a translator comment, continue appending
-                # XXX: Should we check if the programmer keeps adding the
-                # comment_tag for every comment line??? probably not!
                  translator_comments.append((lineno, value))
                  continue
              # If execution reaches this point, let's see if comment line
@@ -353,8 +378,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
              for comment_tag in comment_tags:
                  if value.startswith(comment_tag):
                      in_translator_comments = True
-                    comment = value[len(comment_tag):].strip()
-                    translator_comments.append((lineno, comment))
+                    translator_comments.append((lineno, value))
                      break
          elif funcname and call_stack == 0:
              if tok == OP and value == ')':
diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py

index 19cf8459015221d50164c3fd63386c9a61be5fba..d82f181b1da7f90a3e629d7209d078f71b3306ef 100755 (executable)
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -223,12 +223,14 @@ class extract_messages(Command):
          ('add-comments=', 'c',
           'place comment block with TAG (or those preceding keyword lines) in '
           'output file. Seperate multiple TAGs with commas(,)'),
+        ('strip-comments', None,
+         'strip the comment TAGs from the comments.'),
          ('input-dirs=', None,
           'directories that should be scanned for messages'),
      ]
      boolean_options = [
          'no-default-keywords', 'no-location', 'omit-header', 'no-wrap',
-        'sort-output', 'sort-by-file'
+        'sort-output', 'sort-by-file', 'strip-comments'
      ]
  
      def initialize_options(self):
@@ -249,6 +251,7 @@ class extract_messages(Command):
          self.copyright_holder = None
          self.add_comments = None
          self._add_comments = []
+        self.strip_comments = False
  
      def finalize_options(self):
          if self.no_default_keywords and not self.keywords:
@@ -305,7 +308,9 @@ class extract_messages(Command):
                  extracted = extract_from_dir(dirname, method_map, options_map,
                                               keywords=self._keywords,
                                               comment_tags=self._add_comments,
-                                             callback=callback)
+                                             callback=callback,
+                                             strip_comment_tags=
+                                                self.strip_comments)
                  for filename, lineno, message, comments in extracted:
                      filepath = os.path.normpath(os.path.join(dirname, filename))
                      catalog.add(message, None, [(filepath, lineno)],
@@ -816,12 +821,15 @@ class CommandLineInterface(object):
                            help='place comment block with TAG (or those '
                                 'preceding keyword lines) in output file. One '
                                 'TAG per argument call')
+        parser.add_option('--strip-comment-tags', '-s',
+                          dest='strip_comment_tags', action='store_true',
+                          help='Strip the comment tags from the comments.')
  
          parser.set_defaults(charset='utf-8', keywords=[],
                              no_default_keywords=False, no_location=False,
                              omit_header = False, width=76, no_wrap=False,
                              sort_output=False, sort_by_file=False,
-                            comment_tags=[])
+                            comment_tags=[], strip_comment_tags=False)
          options, args = parser.parse_args(argv)
          if not args:
              parser.error('incorrect number of arguments')
@@ -883,7 +891,9 @@ class CommandLineInterface(object):
  
                  extracted = extract_from_dir(dirname, method_map, options_map,
                                               keywords, options.comment_tags,
-                                             callback=callback)
+                                             callback=callback,
+                                             strip_comment_tags=
+                                                options.strip_comment_tags)
                  for filename, lineno, message, comments in extracted:
                      filepath = os.path.normpath(os.path.join(dirname, filename))
                      catalog.add(message, None, [(filepath, lineno)],
diff --git a/babel/messages/tests/extract.py b/babel/messages/tests/extract.py

index a58d25e116bf09cbee86a79ae9fac6e1f8920879..57d5703aec7565518b2f3f46bf5516ad8dc5729c 100644 (file)
--- a/babel/messages/tests/extract.py
+++ b/babel/messages/tests/extract.py
@@ -145,7 +145,7 @@ msg = _(u'Foo Bar')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment'], messages[0][3])
+        self.assertEqual([u'NOTE: A translation comment'], messages[0][3])
  
      def test_comment_tag_multiline(self):
          buf = StringIO("""
@@ -155,7 +155,7 @@ msg = _(u'Foo Bar')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment', u'with a second line'],
+        self.assertEqual([u'NOTE: A translation comment', u'with a second line'],
                           messages[0][3])
  
      def test_translator_comments_with_previous_non_translator_comments(self):
@@ -168,7 +168,7 @@ msg = _(u'Foo Bar')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment', u'with a second line'],
+        self.assertEqual([u'NOTE: A translation comment', u'with a second line'],
                           messages[0][3])
  
      def test_comment_tags_not_on_start_of_comment(self):
@@ -181,7 +181,7 @@ msg = _(u'Foo Bar')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'This one will be'], messages[0][3])
+        self.assertEqual([u'NOTE: This one will be'], messages[0][3])
  
      def test_multiple_comment_tags(self):
          buf = StringIO("""
@@ -195,10 +195,10 @@ msg = _(u'Foo Bar2')
          messages = list(extract.extract_python(buf, ('_',),
                                                 ['NOTE1:', 'NOTE2:'], {}))
          self.assertEqual(u'Foo Bar1', messages[0][2])
-        self.assertEqual([u'A translation comment for tag1',
+        self.assertEqual([u'NOTE1: A translation comment for tag1',
                            u'with a second line'], messages[0][3])
          self.assertEqual(u'Foo Bar2', messages[1][2])
-        self.assertEqual([u'A translation comment for tag2'], messages[1][3])
+        self.assertEqual([u'NOTE2: A translation comment for tag2'], messages[1][3])
  
      def test_two_succeeding_comments(self):
          buf = StringIO("""
@@ -208,7 +208,7 @@ msg = _(u'Foo Bar')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'one', u'NOTE: two'], messages[0][3])
+        self.assertEqual([u'NOTE: one', u'NOTE: two'], messages[0][3])
  
      def test_invalid_translator_comments(self):
          buf = StringIO("""
@@ -234,7 +234,7 @@ hello = _('Hello')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Hi there!', messages[0][2])
-        self.assertEqual([u'Hi!'], messages[0][3])
+        self.assertEqual([u'NOTE: Hi!'], messages[0][3])
          self.assertEqual(u'Hello', messages[1][2])
          self.assertEqual([], messages[1][3])
  
@@ -274,7 +274,7 @@ msg = _('Bonjour à tous')
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'],
                                                 {'encoding': 'utf-8'}))
          self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
  
      def test_utf8_message_with_magic_comment(self):
          buf = StringIO("""# -*- coding: utf-8 -*-
@@ -283,7 +283,7 @@ msg = _('Bonjour à tous')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
  
      def test_utf8_message_with_utf8_bom(self):
          buf = StringIO(codecs.BOM_UTF8 + """
@@ -292,7 +292,7 @@ msg = _('Bonjour à tous')
  """)
          messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
          self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
  
      def test_utf8_raw_strings_match_unicode_strings(self):
          buf = StringIO(codecs.BOM_UTF8 + """
@@ -303,6 +303,24 @@ msgu = _(u'Bonjour à tous')
          self.assertEqual(u'Bonjour à tous', messages[0][2])
          self.assertEqual(messages[0][2], messages[1][2])
  
+    def test_extract_strip_comment_tags(self):
+        buf = StringIO("""\
+#: This is a comment with a very simple
+#: prefix specified
+_('Servus')
+
+# NOTE: This is a multiline comment with
+# a prefix too
+_('Babatschi')""")
+        messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'],
+                                        strip_comment_tags=True))
+        self.assertEqual(u'Servus', messages[0][1])
+        self.assertEqual([u'This is a comment with a very simple',
+                          u'prefix specified'], messages[0][2])
+        self.assertEqual(u'Babatschi', messages[1][1])
+        self.assertEqual([u'This is a multiline comment with',
+                          u'a prefix too'], messages[1][2])
+
  class ExtractTestCase(unittest.TestCase):
  
      def test_invalid_filter(self):
diff --git a/babel/messages/tests/frontend.py b/babel/messages/tests/frontend.py

index c42bf9e65d3b32af3f287c895bd3a363fb1bb9cf..087514317efdd35c605818698831c8803c10a72b 100644 (file)
--- a/babel/messages/tests/frontend.py
+++ b/babel/messages/tests/frontend.py
@@ -129,7 +129,7 @@ msgstr ""
  "Content-Transfer-Encoding: 8bit\n"
  "Generated-By: Babel %(version)s\n"
  
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
  #. that will include several lines
  #: project/file1.py:8
  msgid "bar"
@@ -187,7 +187,7 @@ msgstr ""
  "Content-Transfer-Encoding: 8bit\n"
  "Generated-By: Babel %(version)s\n"
  
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
  #. that will include several lines
  #: project/file1.py:8
  msgid "bar"
@@ -244,7 +244,7 @@ msgstr ""
  "Content-Transfer-Encoding: 8bit\n"
  "Generated-By: Babel %(version)s\n"
  
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
  #. that will include several lines
  #: project/file1.py:8
  msgid "bar"
author	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 12 Jun 2008 16:24:25 +0000 (16:24 +0000)
ChangeLog		patch \| blob \| blame \| history
babel/messages/extract.py		patch \| blob \| blame \| history
babel/messages/frontend.py		patch \| blob \| blame \| history
babel/messages/tests/extract.py		patch \| blob \| blame \| history
babel/messages/tests/frontend.py		patch \| blob \| blame \| history