.. versionadded:: 3.15
+.. function:: a2b_base32(string, /, *, alphabet=BASE32_ALPHABET)
+
+ Convert base32 data back to binary and return the binary data.
+
+ Valid base32 data contains characters from the base32 alphabet specified
+ in :rfc:`4648` in groups of eight (if necessary, the final group is padded
+ to eight characters with ``=``). Each group encodes 40 bits of binary data
+ in the range from ``0`` to ``2 ** 40 - 1``, inclusive.
+
+ .. note::
+ This function does not map lowercase characters (which are invalid in
+ standard base32) to their uppercase counterparts, nor does it
+ contextually map ``0`` to ``O`` and ``1`` to ``I``/``L`` as :rfc:`4648`
+ allows.
+
+ Optional *alphabet* must be a :class:`bytes` object of length 32 which
+ specifies an alternative alphabet.
+
+ Invalid base32 data will raise :exc:`binascii.Error`.
+
+ .. versionadded:: next
+
+.. function:: b2a_base32(data, /, *, alphabet=BASE32_ALPHABET)
+
+ Convert binary data to a line of ASCII characters in base32 coding,
+ as specified in :rfc:`4648`. The return value is the converted line.
+
+ Optional *alphabet* must be a :term:`bytes-like object` of length 32 which
+ specifies an alternative alphabet.
+
+ .. versionadded:: next
+
.. function:: a2b_qp(data, header=False)
Convert a block of quoted-printable data back to binary and return the binary
.. versionadded:: next
+.. data:: BASE32_ALPHABET
+
+ The Base 32 alphabet according to :rfc:`4648`.
+
+ .. versionadded:: next
+
+.. data:: BASE32HEX_ALPHABET
+
+ The "Extended Hex" Base 32 alphabet according to :rfc:`4648`.
+ Data encoded with this alphabet maintains its sort order during bitwise
+ comparisons.
+
+ .. versionadded:: next
+
.. seealso::
* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
(Contributed by Serhiy Storchaka in :gh:`144001`.)
+* Added functions for Base32 encoding:
+
+ - :func:`~binascii.b2a_base32` and :func:`~binascii.a2b_base32`
+
+ (Contributed by James Seo in :gh:`146192`.)
+
calendar
--------
two orders of magnitude less memory.
(Contributed by James Seo and Serhiy Storchaka in :gh:`101178`.)
+* Implementation for Base32 has been rewritten in C.
+ Encoding and decoding is now two orders of magnitude faster.
+ (Contributed by James Seo in :gh:`146192`)
+
csv
---
the letter O). For security purposes the default is None, so that
0 and 1 are not allowed in the input.
'''
-_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
-_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
-_b32tab2 = {}
-_b32rev = {}
-
-def _b32encode(alphabet, s):
- # Delay the initialization of the table to not waste memory
- # if the function is never called
- if alphabet not in _b32tab2:
- b32tab = [bytes((i,)) for i in alphabet]
- _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
- b32tab = None
-
- if not isinstance(s, bytes_types):
- s = memoryview(s).tobytes()
- leftover = len(s) % 5
- # Pad the last quantum with zero bits if necessary
- if leftover:
- s = s + b'\0' * (5 - leftover) # Don't use += !
- encoded = bytearray()
- from_bytes = int.from_bytes
- b32tab2 = _b32tab2[alphabet]
- for i in range(0, len(s), 5):
- c = from_bytes(s[i: i + 5]) # big endian
- encoded += (b32tab2[c >> 30] + # bits 1 - 10
- b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
- b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
- b32tab2[c & 0x3ff] # bits 31 - 40
- )
- # Adjust for any leftover partial quanta
- if leftover == 1:
- encoded[-6:] = b'======'
- elif leftover == 2:
- encoded[-4:] = b'===='
- elif leftover == 3:
- encoded[-3:] = b'==='
- elif leftover == 4:
- encoded[-1:] = b'='
- return encoded.take_bytes()
-
-def _b32decode(alphabet, s, casefold=False, map01=None):
- # Delay the initialization of the table to not waste memory
- # if the function is never called
- if alphabet not in _b32rev:
- _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
+
+def b32encode(s):
+ return binascii.b2a_base32(s)
+b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
+
+def b32decode(s, casefold=False, map01=None):
s = _bytes_from_decode_data(s)
- if len(s) % 8:
- raise binascii.Error('Incorrect padding')
# Handle section 2.4 zero and one mapping. The flag map01 will be either
# False, or the character to map the digit 1 (one) to. It should be
# either L (el) or I (eye).
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
if casefold:
s = s.upper()
- # Strip off pad characters from the right. We need to count the pad
- # characters because this will tell us how many null bytes to remove from
- # the end of the decoded string.
- l = len(s)
- s = s.rstrip(b'=')
- padchars = l - len(s)
- # Now decode the full quanta
- decoded = bytearray()
- b32rev = _b32rev[alphabet]
- for i in range(0, len(s), 8):
- quanta = s[i: i + 8]
- acc = 0
- try:
- for c in quanta:
- acc = (acc << 5) + b32rev[c]
- except KeyError:
- raise binascii.Error('Non-base32 digit found') from None
- decoded += acc.to_bytes(5) # big endian
- # Process the last, partial quanta
- if l % 8 or padchars not in {0, 1, 3, 4, 6}:
- raise binascii.Error('Incorrect padding')
- if padchars and decoded:
- acc <<= 5 * padchars
- last = acc.to_bytes(5) # big endian
- leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
- decoded[-5:] = last[:leftover]
- return decoded.take_bytes()
-
-
-def b32encode(s):
- return _b32encode(_b32alphabet, s)
-b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
-
-def b32decode(s, casefold=False, map01=None):
- return _b32decode(_b32alphabet, s, casefold, map01)
+ return binascii.a2b_base32(s)
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
extra_args=_B32_DECODE_MAP01_DOCSTRING)
def b32hexencode(s):
- return _b32encode(_b32hexalphabet, s)
+ return binascii.b2a_base32(s, alphabet=binascii.BASE32HEX_ALPHABET)
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
def b32hexdecode(s, casefold=False):
+ s = _bytes_from_decode_data(s)
# base32hex does not have the 01 mapping
- return _b32decode(_b32hexalphabet, s, casefold)
+ if casefold:
+ s = s.upper()
+ return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET)
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
extra_args='')
# Note: "*_hex" functions are aliases for "(un)hexlify"
-b2a_functions = ['b2a_ascii85', 'b2a_base64', 'b2a_base85',
+b2a_functions = ['b2a_ascii85', 'b2a_base32', 'b2a_base64', 'b2a_base85',
'b2a_hex', 'b2a_qp', 'b2a_uu',
'hexlify']
-a2b_functions = ['a2b_ascii85', 'a2b_base64', 'a2b_base85',
+a2b_functions = ['a2b_ascii85', 'a2b_base32', 'a2b_base64', 'a2b_base85',
'a2b_hex', 'a2b_qp', 'a2b_uu',
'unhexlify']
all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx']
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
b'.-:+=^!/*?&<>()[]{}@%$#')
+ self.assertEqual(binascii.BASE32_ALPHABET,
+ b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567')
+ self.assertEqual(binascii.BASE32HEX_ALPHABET,
+ b'0123456789ABCDEFGHIJKLMNOPQRSTUV')
+
def test_functions(self):
# Check presence of all functions
for name in all_functions:
with self.assertRaises(TypeError):
binascii.a2b_base64(data, alphabet=bytearray(alphabet))
+ def test_base32_valid(self):
+ # Test base32 with valid data
+ lines = []
+ step = 0
+ i = 0
+ while i < len(self.rawdata):
+ b = self.type2test(self.rawdata[i:i + step])
+ a = binascii.b2a_base32(b)
+ lines.append(a)
+ i += step
+ step += 1
+ res = bytes()
+ for line in lines:
+ a = self.type2test(line)
+ b = binascii.a2b_base32(a)
+ res += b
+ self.assertEqual(res, self.rawdata)
+
+ def test_base32_errors(self):
+ def _fixPadding(data):
+ fixed = data.replace(b"=", b"")
+ len_8 = len(fixed) % 8
+ p = 8 - len_8 if len_8 else 0
+ return fixed + b"=" * p
+
+ def _assertRegexTemplate(assert_regex, data, good_padding_result=None):
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base32(self.type2test(data))
+ if good_padding_result:
+ fixed = self.type2test(_fixPadding(data))
+ self.assertEqual(binascii.a2b_base32(fixed), good_padding_result)
+
+ def assertNonBase32Data(*args):
+ _assertRegexTemplate(r"(?i)Only base32 data", *args)
+
+ def assertExcessData(*args):
+ _assertRegexTemplate(r"(?i)Excess data", *args)
+
+ def assertExcessPadding(*args):
+ _assertRegexTemplate(r"(?i)Excess padding", *args)
+
+ def assertLeadingPadding(*args):
+ _assertRegexTemplate(r"(?i)Leading padding", *args)
+
+ def assertIncorrectPadding(*args):
+ _assertRegexTemplate(r"(?i)Incorrect padding", *args)
+
+ def assertDiscontinuousPadding(*args):
+ _assertRegexTemplate(r"(?i)Discontinuous padding", *args)
+
+ def assertInvalidLength(*args):
+ _assertRegexTemplate(r"(?i)Invalid.+number of data characters", *args)
+
+ assertNonBase32Data(b"a")
+ assertNonBase32Data(b"AA-")
+ assertNonBase32Data(b"ABCDE==!")
+ assertNonBase32Data(b"ab:(){:|:&};:==")
+
+ assertExcessData(b"AB======C")
+ assertExcessData(b"AB======CD")
+ assertExcessData(b"ABCD====E")
+ assertExcessData(b"ABCDE===FGH")
+ assertExcessData(b"ABCDEFG=H")
+ assertExcessData(b"432Z====55555555")
+
+ assertExcessData(b"BE======EF", b"\t\x08")
+ assertExcessData(b"BEEF====C", b"\t\x08Q")
+ assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01")
+ assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D")
+
+ assertExcessPadding(b"BE=======", b"\t")
+ assertExcessPadding(b"BE========", b"\t")
+ assertExcessPadding(b"BEEF=====", b"\t\x08")
+ assertExcessPadding(b"BEEF======", b"\t\x08")
+ assertExcessPadding(b"BEEFC====", b"\t\x08Q")
+ assertExcessPadding(b"BEEFC=====", b"\t\x08Q")
+ assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01")
+ assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01")
+ assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE====", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE=====", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE======", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE=======", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE========", b"\t\x08Q\x01D")
+ assertExcessPadding(b"BEEFCAKE=========", b"\t\x08Q\x01D")
+
+ assertLeadingPadding(b"=", b"")
+ assertLeadingPadding(b"==", b"")
+ assertLeadingPadding(b"===", b"")
+ assertLeadingPadding(b"====", b"")
+ assertLeadingPadding(b"=====", b"")
+ assertLeadingPadding(b"======", b"")
+ assertLeadingPadding(b"=======", b"")
+ assertLeadingPadding(b"========", b"")
+ assertLeadingPadding(b"=========", b"")
+ assertLeadingPadding(b"=BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"==BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"===BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"====BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"=====BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"======BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"=======BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"========BEEFCAKE", b"\t\x08Q\x01D")
+ assertLeadingPadding(b"=========BEEFCAKE", b"\t\x08Q\x01D")
+
+ assertIncorrectPadding(b"AB")
+ assertIncorrectPadding(b"ABCD")
+ assertIncorrectPadding(b"ABCDE")
+ assertIncorrectPadding(b"ABCDEFG")
+
+ assertIncorrectPadding(b"BE=", b"\t")
+ assertIncorrectPadding(b"BE==", b"\t")
+ assertIncorrectPadding(b"BE===", b"\t")
+ assertIncorrectPadding(b"BE====", b"\t")
+ assertIncorrectPadding(b"BE=====", b"\t")
+ assertIncorrectPadding(b"BEEF=", b"\t\x08")
+ assertIncorrectPadding(b"BEEF==", b"\t\x08")
+ assertIncorrectPadding(b"BEEF===", b"\t\x08")
+ assertIncorrectPadding(b"BEEFC=", b"\t\x08Q")
+ assertIncorrectPadding(b"BEEFC==", b"\t\x08Q")
+
+ assertDiscontinuousPadding(b"BE=EF===", b"\t\x08")
+ assertDiscontinuousPadding(b"BE==EF==", b"\t\x08")
+ assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q")
+ assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01")
+
+ assertInvalidLength(b"A")
+ assertInvalidLength(b"ABC")
+ assertInvalidLength(b"ABCDEF")
+
+ assertInvalidLength(b"A=")
+ assertInvalidLength(b"A==")
+ assertInvalidLength(b"A===")
+ assertInvalidLength(b"A====")
+ assertInvalidLength(b"A=====")
+ assertInvalidLength(b"A======")
+ assertInvalidLength(b"ABC=")
+ assertInvalidLength(b"ABC==")
+ assertInvalidLength(b"ABC===")
+ assertInvalidLength(b"ABC====")
+ assertInvalidLength(b"ABCDEF=")
+
+ assertInvalidLength(b"B=E=====", b"\t")
+ assertInvalidLength(b"B==E====", b"\t")
+ assertInvalidLength(b"BEE=F===", b"\t\x08")
+ assertInvalidLength(b"BEE==F==", b"\t\x08")
+ assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01")
+ assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01")
+
+ def test_base32_alphabet(self):
+ alphabet = b'0Aa1Bb2Cc3Dd4Ee5Ff6Gg7Hh8Ii9JjKk'
+ data = self.type2test(self.rawdata)
+ encoded = binascii.b2a_base32(data, alphabet=alphabet)
+ trans = bytes.maketrans(binascii.BASE32_ALPHABET, alphabet)
+ expected = binascii.b2a_base32(data).translate(trans)
+ self.assertEqual(encoded, expected)
+ self.assertEqual(binascii.a2b_base32(encoded, alphabet=alphabet), self.rawdata)
+ self.assertEqual(binascii.b2a_base32(data, alphabet=self.type2test(alphabet)), expected)
+
+ data = self.type2test(b'')
+ self.assertEqual(binascii.b2a_base32(data, alphabet=alphabet), b'')
+ self.assertEqual(binascii.a2b_base32(data, alphabet=alphabet), b'')
+
+ for func in binascii.b2a_base32, binascii.a2b_base32:
+ with self.assertRaises(TypeError):
+ func(data, alphabet=None)
+ with self.assertRaises(TypeError):
+ func(data, alphabet=alphabet.decode())
+ with self.assertRaises(ValueError):
+ func(data, alphabet=alphabet[:-1])
+ with self.assertRaises(ValueError):
+ func(data, alphabet=alphabet+b'?')
+ with self.assertRaises(TypeError):
+ binascii.a2b_base32(data, alphabet=bytearray(alphabet))
+
def test_uu(self):
MAX_UU = 45
for backtick in (True, False):
--- /dev/null
+Add Base32 support to :mod:`binascii` and improve the performance of the
+Base32 converters in :mod:`base64`. Patch by James Seo.
#define BASE85_A85_Z 0x00000000
#define BASE85_A85_Y 0x20202020
+
+static const unsigned char table_a2b_base32[] Py_ALIGNED(64) = {
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,26,27, 28,29,30,31, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
+ 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+};
+
+static const unsigned char table_b2a_base32[] Py_ALIGNED(64) =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
+
+#define BASE32_PAD '='
+
+/*
+ * Fast base32 encoding/decoding helpers.
+ *
+ * Analogous to the helpers for base64.
+ */
+
+/* Encode 5 bytes into 8 base32 characters. */
+static inline void
+base32_encode_quint(const unsigned char *in, unsigned char *out,
+ const unsigned char table[])
+{
+ uint64_t combined = ((uint64_t)in[0] << 32) |
+ ((uint64_t)in[1] << 24) |
+ ((uint64_t)in[2] << 16) |
+ ((uint64_t)in[3] << 8) |
+ (uint64_t)in[4];
+ out[0] = table[(combined >> 35) & 0x1f];
+ out[1] = table[(combined >> 30) & 0x1f];
+ out[2] = table[(combined >> 25) & 0x1f];
+ out[3] = table[(combined >> 20) & 0x1f];
+ out[4] = table[(combined >> 15) & 0x1f];
+ out[5] = table[(combined >> 10) & 0x1f];
+ out[6] = table[(combined >> 5) & 0x1f];
+ out[7] = table[combined & 0x1f];
+}
+
+/*
+ * Encode multiple complete 5-byte groups.
+ * Returns the number of input bytes processed (always a multiple of 5).
+ */
+static inline Py_ssize_t
+base32_encode_fast(const unsigned char *in, Py_ssize_t in_len,
+ unsigned char *out, const unsigned char table[])
+{
+ Py_ssize_t n_quints = in_len / 5;
+ const unsigned char *in_end = in + n_quints * 5;
+
+ while (in < in_end) {
+ base32_encode_quint(in, out, table);
+ in += 5;
+ out += 8;
+ }
+
+ return n_quints * 5;
+}
+
+/*
+ * Decode 8 base32 characters into 5 bytes.
+ * Returns 1 on success, 0 if any character is invalid.
+ */
+static inline int
+base32_decode_octa(const unsigned char *in, unsigned char *out,
+ const unsigned char table[])
+{
+ unsigned char v0 = table[in[0]];
+ unsigned char v1 = table[in[1]];
+ unsigned char v2 = table[in[2]];
+ unsigned char v3 = table[in[3]];
+ unsigned char v4 = table[in[4]];
+ unsigned char v5 = table[in[5]];
+ unsigned char v6 = table[in[6]];
+ unsigned char v7 = table[in[7]];
+
+ if ((v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7) & 0xe0) {
+ return 0;
+ }
+
+ out[0] = (v0 << 3) | (v1 >> 2);
+ out[1] = (v1 << 6) | (v2 << 1) | (v3 >> 4);
+ out[2] = (v3 << 4) | (v4 >> 1);
+ out[3] = (v4 << 7) | (v5 << 2) | (v6 >> 3);
+ out[4] = (v6 << 5) | v7;
+ return 1;
+}
+
+/*
+ * Decode multiple complete 8-character groups (no padding allowed).
+ * Returns the number of input characters processed.
+ * Stops at the first invalid character, padding, or incomplete group.
+ */
+static inline Py_ssize_t
+base32_decode_fast(const unsigned char *in, Py_ssize_t in_len,
+ unsigned char *out, const unsigned char table[])
+{
+ Py_ssize_t n_quints = in_len / 8;
+ Py_ssize_t i;
+
+ for (i = 0; i < n_quints; i++) {
+ if (!base32_decode_octa(in + i * 8, out + i * 5, table)) {
+ break;
+ }
+ }
+
+ return i * 8;
+}
+
+
static const unsigned short crctab_hqx[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
return PyBytesWriter_FinishWithPointer(writer, ascii_data);
}
+/*[clinic input]
+binascii.a2b_base32
+
+ data: ascii_buffer
+ /
+ *
+ alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET
+
+Decode a line of base32 data.
+[clinic start generated code]*/
+
+static PyObject *
+binascii_a2b_base32_impl(PyObject *module, Py_buffer *data,
+ PyBytesObject *alphabet)
+/*[clinic end generated code: output=12cb58bf547237e2 input=426055ea49ac147e]*/
+{
+ const unsigned char *ascii_data = data->buf;
+ Py_ssize_t ascii_len = data->len;
+ binascii_state *state = NULL;
+ PyObject *table_obj = NULL;
+ const unsigned char *table_a2b = table_a2b_base32;
+
+ assert(ascii_len >= 0);
+
+ if (alphabet != NULL) {
+ state = get_binascii_state(module);
+ table_obj = get_reverse_table(state, (PyObject *)alphabet, 32, BASE32_PAD);
+ if (table_obj == NULL) {
+ return NULL;
+ }
+ table_a2b = (const unsigned char *)PyBytes_AS_STRING(table_obj);
+ }
+
+ /* Allocate output buffer. */
+ size_t bin_len = ((size_t)ascii_len + 7) / 8 * 5;
+ PyBytesWriter *writer = PyBytesWriter_Create(bin_len);
+ if (writer == NULL) {
+ Py_XDECREF(table_obj);
+ return NULL;
+ }
+ unsigned char *bin_data = PyBytesWriter_GetData(writer);
+
+ /*
+ * Fast path: use optimized decoder for complete octas (groups of 8 bytes).
+ * The fast path stops at padding, invalid chars, or incomplete octas.
+ */
+ if (ascii_len >= 8) {
+ Py_ssize_t fast_chars = base32_decode_fast(ascii_data, ascii_len,
+ bin_data, table_a2b);
+ if (fast_chars > 0) {
+ ascii_data += fast_chars;
+ ascii_len -= fast_chars;
+ bin_data += (fast_chars / 8) * 5;
+ }
+ }
+
+ /* Slow path: handle remaining input (padding, invalid chars, incomplete octas). */
+ unsigned char leftchar = 0;
+ int octa_pos = 0;
+ int pads = 0;
+ for (; ascii_len; ascii_len--, ascii_data++) {
+ unsigned char this_ch = *ascii_data;
+
+ /* Check for pad sequences. They may only occur at certain positions. */
+ if (this_ch == BASE32_PAD) {
+ pads++;
+
+ if ((octa_pos == 2 || octa_pos == 4 || octa_pos == 5 || octa_pos == 7)
+ && octa_pos + pads <= 8)
+ {
+ continue;
+ }
+ if (octa_pos == 1 || octa_pos == 3 || octa_pos == 6) {
+ /* Set an error below. */
+ break;
+ }
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error,
+ (octa_pos == 0 && ascii_data == data->buf)
+ ? "Leading padding not allowed"
+ : "Excess padding not allowed");
+ }
+ goto error;
+ }
+
+ unsigned char v = table_a2b[this_ch];
+ if (v >= 32) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Only base32 data is allowed");
+ }
+ goto error;
+ }
+
+ /* Data in the middle of/after the padding is not allowed. */
+ if (pads) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, (octa_pos + pads == 8)
+ ? "Excess data after padding"
+ : "Discontinuous padding not allowed");
+ }
+ goto error;
+ }
+
+ switch (octa_pos) {
+ case 0:
+ octa_pos = 1;
+ leftchar = v;
+ break;
+ case 1:
+ octa_pos = 2;
+ *bin_data++ = (leftchar << 3) | (v >> 2);
+ leftchar = v & 0x03;
+ break;
+ case 2:
+ octa_pos = 3;
+ leftchar = (leftchar << 5) | v;
+ break;
+ case 3:
+ octa_pos = 4;
+ *bin_data++ = (leftchar << 1) | (v >> 4);
+ leftchar = v & 0x0f;
+ break;
+ case 4:
+ octa_pos = 5;
+ *bin_data++ = (leftchar << 4) | (v >> 1);
+ leftchar = v & 0x01;
+ break;
+ case 5:
+ octa_pos = 6;
+ leftchar = (leftchar << 5) | v;
+ break;
+ case 6:
+ octa_pos = 7;
+ *bin_data++ = (leftchar << 2) | (v >> 3);
+ leftchar = v & 0x07;
+ break;
+ case 7:
+ octa_pos = 0;
+ *bin_data++ = (leftchar << 5) | v;
+ leftchar = 0;
+ }
+ }
+
+ if (octa_pos == 1 || octa_pos == 3 || octa_pos == 6) {
+ state = get_binascii_state(module);
+ if (state) {
+ const unsigned char *ascii_data_start = data->buf;
+ PyErr_Format(state->Error,
+ "Invalid base32-encoded string: "
+ "number of data characters (%zd) "
+ "cannot be 1, 3, or 6 more than a multiple of 8",
+ ascii_data - ascii_data_start);
+ }
+ goto error;
+ }
+
+ if ((octa_pos != 0 && octa_pos + pads != 8)
+ || (octa_pos == 0 && pads != 0))
+ {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Incorrect padding");
+ }
+ goto error;
+ }
+
+ Py_XDECREF(table_obj);
+ return PyBytesWriter_FinishWithPointer(writer, bin_data);
+
+error:
+ PyBytesWriter_Discard(writer);
+ Py_XDECREF(table_obj);
+ return NULL;
+}
+
+/*[clinic input]
+binascii.b2a_base32
+
+ data: Py_buffer
+ /
+ *
+ alphabet: Py_buffer(c_default="{NULL, NULL}") = BASE32_ALPHABET
+
+Base32-code line of data.
+[clinic start generated code]*/
+
+static PyObject *
+binascii_b2a_base32_impl(PyObject *module, Py_buffer *data,
+ Py_buffer *alphabet)
+/*[clinic end generated code: output=058d0d1aeb014d3b input=99cbe7194799d368]*/
+{
+ const unsigned char *table_b2a = table_b2a_base32;
+ const unsigned char *bin_data = data->buf;
+ Py_ssize_t bin_len = data->len;
+ binascii_state *state = NULL;
+
+ assert(bin_len >= 0);
+
+ if (alphabet->buf != NULL) {
+ if (alphabet->len != 32) {
+ PyErr_SetString(PyExc_ValueError, "alphabet must have length 32");
+ return NULL;
+ }
+ table_b2a = alphabet->buf;
+ }
+
+ /*
+ * Each group of 5 bytes (rounded up) gets encoded as 8 characters.
+ * Use unsigned integer arithmetic to avoid signed integer overflow.
+ */
+ size_t ascii_len = ((size_t)bin_len + 4u) / 5u * 8u;
+ if (ascii_len > PY_SSIZE_T_MAX) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, "Too much data for base32");
+ }
+ return NULL;
+ }
+ PyBytesWriter *writer = PyBytesWriter_Create(ascii_len);
+ if (writer == NULL) {
+ return NULL;
+ }
+ unsigned char *ascii_data = PyBytesWriter_GetData(writer);
+
+ /* Use the optimized fast path for complete 5-byte groups. */
+ Py_ssize_t fast_bytes = base32_encode_fast(bin_data, bin_len, ascii_data,
+ table_b2a);
+ bin_data += fast_bytes;
+ ascii_data += (fast_bytes / 5) * 8;
+ bin_len -= fast_bytes;
+
+ /* Handle the remaining 0-4 bytes. */
+ if (bin_len == 1) {
+ /* 1 byte remaining: produces 2 encoded + 6 padding chars. */
+ uint32_t val = bin_data[0];
+ *ascii_data++ = table_b2a[(val >> 3) & 0x1f];
+ *ascii_data++ = table_b2a[(val << 2) & 0x1f];
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ }
+ else if (bin_len == 2) {
+ /* 2 bytes remaining: produces 4 encoded + 4 padding chars. */
+ uint32_t val = ((uint32_t)bin_data[0] << 8) | bin_data[1];
+ *ascii_data++ = table_b2a[(val >> 11) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 6) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 1) & 0x1f];
+ *ascii_data++ = table_b2a[(val << 4) & 0x1f];
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ }
+ else if (bin_len == 3) {
+ /* 3 bytes remaining: produces 5 encoded + 3 padding chars. */
+ uint32_t val = ((uint32_t)bin_data[0] << 16)
+ | ((uint32_t)bin_data[1] << 8)
+ | bin_data[2];
+ *ascii_data++ = table_b2a[(val >> 19) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 14) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 9) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 4) & 0x1f];
+ *ascii_data++ = table_b2a[(val << 1) & 0x1f];
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ *ascii_data++ = BASE32_PAD;
+ }
+ else if (bin_len == 4) {
+ /* 4 bytes remaining: produces 7 encoded + 1 padding chars. */
+ uint32_t val = ((uint32_t)bin_data[0] << 24)
+ | ((uint32_t)bin_data[1] << 16)
+ | ((uint32_t)bin_data[2] << 8)
+ | bin_data[3];
+ *ascii_data++ = table_b2a[(val >> 27) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 22) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 17) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 12) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 7) & 0x1f];
+ *ascii_data++ = table_b2a[(val >> 2) & 0x1f];
+ *ascii_data++ = table_b2a[(val << 3) & 0x1f];
+ *ascii_data++ = BASE32_PAD;
+ }
+
+ return PyBytesWriter_FinishWithPointer(writer, ascii_data);
+}
+
/*[clinic input]
binascii.crc_hqx
BINASCII_A2B_ASCII85_METHODDEF
BINASCII_A2B_BASE85_METHODDEF
BINASCII_B2A_BASE85_METHODDEF
+ BINASCII_A2B_BASE32_METHODDEF
+ BINASCII_B2A_BASE32_METHODDEF
BINASCII_A2B_HEX_METHODDEF
BINASCII_B2A_HEX_METHODDEF
BINASCII_HEXLIFY_METHODDEF
{
return -1;
}
+ if (PyModule_Add(module, "BASE32_ALPHABET",
+ PyBytes_FromStringAndSize((const char *)table_b2a_base32, 32)) < 0)
+ {
+ return -1;
+ }
+ if (PyModule_Add(module, "BASE32HEX_ALPHABET",
+ PyBytes_FromString("0123456789ABCDEFGHIJKLMNOPQRSTUV")) < 0)
+ {
+ return -1;
+ }
state->reverse_table_cache = PyDict_New();
if (state->reverse_table_cache == NULL) {
return return_value;
}
+PyDoc_STRVAR(binascii_a2b_base32__doc__,
+"a2b_base32($module, data, /, *, alphabet=BASE32_ALPHABET)\n"
+"--\n"
+"\n"
+"Decode a line of base32 data.");
+
+#define BINASCII_A2B_BASE32_METHODDEF \
+ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__},
+
+static PyObject *
+binascii_a2b_base32_impl(PyObject *module, Py_buffer *data,
+ PyBytesObject *alphabet);
+
+static PyObject *
+binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(alphabet), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"", "alphabet", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "a2b_base32",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ PyBytesObject *alphabet = NULL;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!ascii_buffer_converter(args[0], &data)) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_kwonly;
+ }
+ if (!PyBytes_Check(args[1])) {
+ _PyArg_BadArgument("a2b_base32", "argument 'alphabet'", "bytes", args[1]);
+ goto exit;
+ }
+ alphabet = (PyBytesObject *)args[1];
+skip_optional_kwonly:
+ return_value = binascii_a2b_base32_impl(module, &data, alphabet);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj)
+ PyBuffer_Release(&data);
+
+ return return_value;
+}
+
+PyDoc_STRVAR(binascii_b2a_base32__doc__,
+"b2a_base32($module, data, /, *, alphabet=BASE32_ALPHABET)\n"
+"--\n"
+"\n"
+"Base32-code line of data.");
+
+#define BINASCII_B2A_BASE32_METHODDEF \
+ {"b2a_base32", _PyCFunction_CAST(binascii_b2a_base32), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base32__doc__},
+
+static PyObject *
+binascii_b2a_base32_impl(PyObject *module, Py_buffer *data,
+ Py_buffer *alphabet);
+
+static PyObject *
+binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(alphabet), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"", "alphabet", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "b2a_base32",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ Py_buffer alphabet = {NULL, NULL};
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_kwonly;
+ }
+ if (PyObject_GetBuffer(args[1], &alphabet, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+skip_optional_kwonly:
+ return_value = binascii_b2a_base32_impl(module, &data, &alphabet);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+ /* Cleanup for alphabet */
+ if (alphabet.obj) {
+ PyBuffer_Release(&alphabet);
+ }
+
+ return return_value;
+}
+
PyDoc_STRVAR(binascii_crc_hqx__doc__,
"crc_hqx($module, data, crc, /)\n"
"--\n"
return return_value;
}
-/*[clinic end generated code: output=84c97096b0fb3819 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=242c0c56b918bd33 input=a9049054013a1b77]*/