]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'tk/p4-metadata-coding-strategies'
authorJunio C Hamano <gitster@pobox.com>
Fri, 20 May 2022 22:27:00 +0000 (15:27 -0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 20 May 2022 22:27:00 +0000 (15:27 -0700)
"git p4" updates.

* tk/p4-metadata-coding-strategies:
  git-p4: improve encoding handling to support inconsistent encodings

1  2 
git-p4.py

diff --cc git-p4.py
index c47abb4bff999838aa195b52f5583280f1142a0e,d24c3535f8ac44d286199c9abf384df6eed5ca2c..8fbf6eb1fe385090489f55c87913d0867ad8c349
+++ b/git-p4.py
@@@ -7,30 -7,15 +7,31 @@@
  #            2007 Trolltech ASA
  # License: MIT <http://www.opensource.org/licenses/mit-license.php>
  #
 -# pylint: disable=invalid-name,missing-docstring,too-many-arguments,broad-except
 -# pylint: disable=no-self-use,wrong-import-position,consider-iterating-dictionary
 -# pylint: disable=wrong-import-order,unused-import,too-few-public-methods
 -# pylint: disable=too-many-lines,ungrouped-imports,fixme,too-many-locals
 -# pylint: disable=line-too-long,bad-whitespace,superfluous-parens
 -# pylint: disable=too-many-statements,too-many-instance-attributes
 -# pylint: disable=too-many-branches,too-many-nested-blocks
 +# pylint: disable=bad-whitespace
 +# pylint: disable=broad-except
 +# pylint: disable=consider-iterating-dictionary
 +# pylint: disable=disable
 +# pylint: disable=fixme
 +# pylint: disable=invalid-name
 +# pylint: disable=line-too-long
 +# pylint: disable=missing-docstring
 +# pylint: disable=no-self-use
 +# pylint: disable=superfluous-parens
 +# pylint: disable=too-few-public-methods
 +# pylint: disable=too-many-arguments
 +# pylint: disable=too-many-branches
 +# pylint: disable=too-many-instance-attributes
 +# pylint: disable=too-many-lines
 +# pylint: disable=too-many-locals
 +# pylint: disable=too-many-nested-blocks
 +# pylint: disable=too-many-statements
 +# pylint: disable=ungrouped-imports
 +# pylint: disable=unused-import
 +# pylint: disable=wrong-import-order
 +# pylint: disable=wrong-import-position
  #
 +
+ import struct
  import sys
  if sys.version_info.major < 3 and sys.version_info.minor < 7:
      sys.stderr.write("git-p4: requires Python 2.7 or later.\n")
@@@ -69,8 -53,11 +70,11 @@@ verbose = Fals
  defaultLabelRegexp = r'[a-zA-Z0-9_\-.]+$'
  
  # The block size is reduced automatically if required
 -defaultBlockSize = 1<<20
 +defaultBlockSize = 1 << 20
  
+ defaultMetadataDecodingStrategy = 'passthrough' if sys.version_info.major == 2 else 'fallback'
+ defaultFallbackMetadataEncoding = 'cp1252'
  p4_access_checked = False
  
  re_ko_keywords = re.compile(br'\$(Id|Header)(:[^$\n]+)?\$')
@@@ -229,11 -208,72 +233,75 @@@ else
          return s.encode('utf_8') if isinstance(s, unicode) else s
  
  
+ class MetadataDecodingException(Exception):
+     def __init__(self, input_string):
+         self.input_string = input_string
+     def __str__(self):
+         return """Decoding perforce metadata failed!
+ The failing string was:
+ ---
+ {}
+ ---
+ Consider setting the git-p4.metadataDecodingStrategy config option to
+ 'fallback', to allow metadata to be decoded using a fallback encoding,
+ defaulting to cp1252.""".format(self.input_string)
+ encoding_fallback_warning_issued = False
+ encoding_escape_warning_issued = False
+ def metadata_stream_to_writable_bytes(s):
+     encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy
+     fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding
+     if not isinstance(s, bytes):
+         return s.encode('utf_8')
+     if encodingStrategy == 'passthrough':
+         return s
+     try:
+         s.decode('utf_8')
+         return s
+     except UnicodeDecodeError:
+         if encodingStrategy == 'fallback' and fallbackEncoding:
+             global encoding_fallback_warning_issued
+             global encoding_escape_warning_issued
+             try:
+                 if not encoding_fallback_warning_issued:
+                     print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s))
+                     print("\n(this warning is only displayed once during an import)")
+                     encoding_fallback_warning_issued = True
+                 return s.decode(fallbackEncoding).encode('utf_8')
+             except Exception as exc:
+                 if not encoding_escape_warning_issued:
+                     print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s))
+                     print("\n(this warning is only displayed once during an import)")
+                     encoding_escape_warning_issued = True
+                 escaped_bytes = b''
+                 # bytes and strings work very differently in python2 vs python3...
+                 if str is bytes:
+                     for byte in s:
+                         byte_number = struct.unpack('>B', byte)[0]
+                         if byte_number > 127:
+                             escaped_bytes += b'%'
+                             escaped_bytes += hex(byte_number)[2:].upper()
+                         else:
+                             escaped_bytes += byte
+                 else:
+                     for byte_number in s:
+                         if byte_number > 127:
+                             escaped_bytes += b'%'
+                             escaped_bytes += hex(byte_number).upper().encode()[2:]
+                         else:
+                             escaped_bytes += bytes([byte_number])
+                 return escaped_bytes
+         raise MetadataDecodingException(s)
++
  def decode_path(path):
 -    """Decode a given string (bytes or otherwise) using configured path encoding options
 -    """
 +    """Decode a given string (bytes or otherwise) using configured path
 +       encoding options.
 +       """
 +
      encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
      if bytes is not str:
          return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
@@@ -3229,13 -3101,13 +3310,14 @@@ class P4Sync(Command, P4UserMap)
          if userid in self.users:
              return self.users[userid]
          else:
-             return "%s <a@b>" % userid
+             userid_bytes = metadata_stream_to_writable_bytes(userid)
+             return b"%s <a@b>" % userid_bytes
  
      def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
 -        """ Stream a p4 tag.
 -        commit is either a git commit, or a fast-import mark, ":<p4commit>"
 -        """
 +        """Stream a p4 tag.
 +
 +           Commit is either a git commit, or a fast-import mark, ":<p4commit>".
 +           """
  
          if verbose:
              print("writing tag %s for commit %s" % (labelName, commit))
              email = self.make_email(owner)
          else:
              email = self.make_email(self.p4UserId())
-         tagger = "%s %s %s" % (email, epoch, self.tz)
  
-         gitStream.write("tagger %s\n" % tagger)
+         gitStream.write("tagger ")
+         gitStream.write(email)
+         gitStream.write(" %s %s\n" % (epoch, self.tz))
  
 -        print("labelDetails=",labelDetails)
 +        print("labelDetails=", labelDetails)
          if 'Description' in labelDetails:
              description = labelDetails['Description']
          else:
@@@ -4257,9 -4138,16 +4340,17 @@@ class P4Clone(P4Sync)
          if self.useClientSpec_from_options:
              system(["git", "config", "--bool", "git-p4.useclientspec", "true"])
  
+         # persist any git-p4 encoding-handling config options passed in for clone:
+         if gitConfig('git-p4.metadataDecodingStrategy'):
+             system(["git", "config", "git-p4.metadataDecodingStrategy", gitConfig('git-p4.metadataDecodingStrategy')])
+         if gitConfig('git-p4.metadataFallbackEncoding'):
+             system(["git", "config", "git-p4.metadataFallbackEncoding", gitConfig('git-p4.metadataFallbackEncoding')])
+         if gitConfig('git-p4.pathEncoding'):
+             system(["git", "config", "git-p4.pathEncoding", gitConfig('git-p4.pathEncoding')])
          return True
  
 +
  class P4Unshelve(Command):
      def __init__(self):
          Command.__init__(self)