From: Junio C Hamano Date: Fri, 20 May 2022 22:27:00 +0000 (-0700) Subject: Merge branch 'tk/p4-metadata-coding-strategies' X-Git-Tag: v2.37.0-rc0~66 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3af1df0415741c85f84e78e281339186be1d05d7;p=thirdparty%2Fgit.git Merge branch 'tk/p4-metadata-coding-strategies' "git p4" updates. * tk/p4-metadata-coding-strategies: git-p4: improve encoding handling to support inconsistent encodings --- 3af1df0415741c85f84e78e281339186be1d05d7 diff --cc git-p4.py index c47abb4bff,d24c3535f8..8fbf6eb1fe --- a/git-p4.py +++ b/git-p4.py @@@ -7,30 -7,15 +7,31 @@@ # 2007 Trolltech ASA # License: MIT # -# pylint: disable=invalid-name,missing-docstring,too-many-arguments,broad-except -# pylint: disable=no-self-use,wrong-import-position,consider-iterating-dictionary -# pylint: disable=wrong-import-order,unused-import,too-few-public-methods -# pylint: disable=too-many-lines,ungrouped-imports,fixme,too-many-locals -# pylint: disable=line-too-long,bad-whitespace,superfluous-parens -# pylint: disable=too-many-statements,too-many-instance-attributes -# pylint: disable=too-many-branches,too-many-nested-blocks +# pylint: disable=bad-whitespace +# pylint: disable=broad-except +# pylint: disable=consider-iterating-dictionary +# pylint: disable=disable +# pylint: disable=fixme +# pylint: disable=invalid-name +# pylint: disable=line-too-long +# pylint: disable=missing-docstring +# pylint: disable=no-self-use +# pylint: disable=superfluous-parens +# pylint: disable=too-few-public-methods +# pylint: disable=too-many-arguments +# pylint: disable=too-many-branches +# pylint: disable=too-many-instance-attributes +# pylint: disable=too-many-lines +# pylint: disable=too-many-locals +# pylint: disable=too-many-nested-blocks +# pylint: disable=too-many-statements +# pylint: disable=ungrouped-imports +# pylint: disable=unused-import +# pylint: disable=wrong-import-order +# pylint: disable=wrong-import-position # + + import struct import sys if sys.version_info.major < 3 and sys.version_info.minor < 7: sys.stderr.write("git-p4: requires Python 2.7 or later.\n") @@@ -69,8 -53,11 +70,11 @@@ verbose = Fals defaultLabelRegexp = r'[a-zA-Z0-9_\-.]+$' # The block size is reduced automatically if required -defaultBlockSize = 1<<20 +defaultBlockSize = 1 << 20 + defaultMetadataDecodingStrategy = 'passthrough' if sys.version_info.major == 2 else 'fallback' + defaultFallbackMetadataEncoding = 'cp1252' + p4_access_checked = False re_ko_keywords = re.compile(br'\$(Id|Header)(:[^$\n]+)?\$') @@@ -229,11 -208,72 +233,75 @@@ else return s.encode('utf_8') if isinstance(s, unicode) else s + class MetadataDecodingException(Exception): + def __init__(self, input_string): + self.input_string = input_string + + def __str__(self): + return """Decoding perforce metadata failed! + The failing string was: + --- + {} + --- + Consider setting the git-p4.metadataDecodingStrategy config option to + 'fallback', to allow metadata to be decoded using a fallback encoding, + defaulting to cp1252.""".format(self.input_string) + + + encoding_fallback_warning_issued = False + encoding_escape_warning_issued = False + def metadata_stream_to_writable_bytes(s): + encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy + fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding + if not isinstance(s, bytes): + return s.encode('utf_8') + if encodingStrategy == 'passthrough': + return s + try: + s.decode('utf_8') + return s + except UnicodeDecodeError: + if encodingStrategy == 'fallback' and fallbackEncoding: + global encoding_fallback_warning_issued + global encoding_escape_warning_issued + try: + if not encoding_fallback_warning_issued: + print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s)) + print("\n(this warning is only displayed once during an import)") + encoding_fallback_warning_issued = True + return s.decode(fallbackEncoding).encode('utf_8') + except Exception as exc: + if not encoding_escape_warning_issued: + print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s)) + print("\n(this warning is only displayed once during an import)") + encoding_escape_warning_issued = True + escaped_bytes = b'' + # bytes and strings work very differently in python2 vs python3... + if str is bytes: + for byte in s: + byte_number = struct.unpack('>B', byte)[0] + if byte_number > 127: + escaped_bytes += b'%' + escaped_bytes += hex(byte_number)[2:].upper() + else: + escaped_bytes += byte + else: + for byte_number in s: + if byte_number > 127: + escaped_bytes += b'%' + escaped_bytes += hex(byte_number).upper().encode()[2:] + else: + escaped_bytes += bytes([byte_number]) + return escaped_bytes + + raise MetadataDecodingException(s) + ++ def decode_path(path): - """Decode a given string (bytes or otherwise) using configured path encoding options - """ + """Decode a given string (bytes or otherwise) using configured path + encoding options. + """ + encoding = gitConfig('git-p4.pathEncoding') or 'utf_8' if bytes is not str: return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path @@@ -3229,13 -3101,13 +3310,14 @@@ class P4Sync(Command, P4UserMap) if userid in self.users: return self.users[userid] else: - return "%s " % userid + userid_bytes = metadata_stream_to_writable_bytes(userid) + return b"%s " % userid_bytes def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): - """ Stream a p4 tag. - commit is either a git commit, or a fast-import mark, ":" - """ + """Stream a p4 tag. + + Commit is either a git commit, or a fast-import mark, ":". + """ if verbose: print("writing tag %s for commit %s" % (labelName, commit)) @@@ -3253,11 -3125,12 +3335,12 @@@ email = self.make_email(owner) else: email = self.make_email(self.p4UserId()) - tagger = "%s %s %s" % (email, epoch, self.tz) - gitStream.write("tagger %s\n" % tagger) + gitStream.write("tagger ") + gitStream.write(email) + gitStream.write(" %s %s\n" % (epoch, self.tz)) - print("labelDetails=",labelDetails) + print("labelDetails=", labelDetails) if 'Description' in labelDetails: description = labelDetails['Description'] else: @@@ -4257,9 -4138,16 +4340,17 @@@ class P4Clone(P4Sync) if self.useClientSpec_from_options: system(["git", "config", "--bool", "git-p4.useclientspec", "true"]) + # persist any git-p4 encoding-handling config options passed in for clone: + if gitConfig('git-p4.metadataDecodingStrategy'): + system(["git", "config", "git-p4.metadataDecodingStrategy", gitConfig('git-p4.metadataDecodingStrategy')]) + if gitConfig('git-p4.metadataFallbackEncoding'): + system(["git", "config", "git-p4.metadataFallbackEncoding", gitConfig('git-p4.metadataFallbackEncoding')]) + if gitConfig('git-p4.pathEncoding'): + system(["git", "config", "git-p4.pathEncoding", gitConfig('git-p4.pathEncoding')]) + return True + class P4Unshelve(Command): def __init__(self): Command.__init__(self)