arm arm/neon arm/v6 arm/fat \
arm64 arm64/crypto arm64/fat \
powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
- s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
+ s390x s390x/vf s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
mkdir "$(distdir)/$$d" ; \
find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
-exec cp '{}' "$(distdir)/$$d" ';' ; \
AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),,
[enable_altivec=no])
+AC_ARG_ENABLE(s390x-vf,
+ AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),,
+ [enable_s390x_vf=no])
+
AC_ARG_ENABLE(s390x-msa,
AC_HELP_STRING([--enable-s390x-msa], [Enable message-security assist extensions on z/Architecture. (default=no)]),,
[enable_s390x_msa=no])
if test "x$enable_fat" = xyes ; then
asm_path="s390x/fat $asm_path"
OPT_NETTLE_SOURCES="fat-s390x.c $OPT_NETTLE_SOURCES"
- FAT_TEST_LIST="none msa_x1 msa_x2 msa_x4"
+ FAT_TEST_LIST="none vf msa_x1 msa_x2 msa_x4"
else
+ if test "$enable_s390x_vf" = yes ; then
+ asm_path="s390x/vf $asm_path"
+ fi
if test "$enable_s390x_msa" = yes ; then
asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path"
fi
# Assembler files which generate additional object files if they are used.
asm_nettle_optional_list="gcm-hash.asm gcm-hash8.asm cpuid.asm cpu-facility.asm \
- aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \
+ aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm memxor3-2.asm \
aes128-set-encrypt-key-2.asm aes128-set-decrypt-key-2.asm \
aes128-encrypt-2.asm aes128-decrypt-2.asm \
aes192-set-encrypt-key-2.asm aes192-set-decrypt-key-2.asm \
AH_VERBATIM([HAVE_NATIVE],
[/* Define to 1 each of the following for which a native (ie. CPU specific)
implementation of the corresponding routine exists. */
+#undef HAVE_NATIVE_memxor3
#undef HAVE_NATIVE_aes_decrypt
#undef HAVE_NATIVE_aes_encrypt
#undef HAVE_NATIVE_aes128_decrypt
#include "nettle-types.h"
+#include "memxor.h"
#include "aes.h"
#include "gcm.h"
#include "gcm-internal.h"
#endif
/* Facility bits */
+#define FAC_VF 129 /* vector facility */
#define FAC_MSA 17 /* message-security assist */
#define FAC_MSA_X4 77 /* message-security-assist extension 4 */
struct s390x_features
{
+ int have_vector_facility;
int have_km_aes128;
int have_km_aes192;
int have_km_aes256;
static void
get_s390x_features (struct s390x_features *features)
{
+ features->have_vector_facility = 0;
features->have_km_aes128 = 0;
features->have_km_aes192 = 0;
features->have_km_aes256 = 0;
const char *sep = strchr (s, ',');
size_t length = sep ? (size_t) (sep - s) : strlen(s);
- if (MATCH (s, length, "msa_x1", 6))
+ if (MATCH (s, length, "vf", 2))
+ features->have_vector_facility = 1;
+ else if (MATCH (s, length, "msa_x1", 6))
{
features->have_km_aes128 = 1;
}
uint64_t facilities[FACILITY_DOUBLEWORDS_MAX] = {0};
_nettle_stfle(facilities, FACILITY_DOUBLEWORDS_MAX);
+ if (facilities[FACILITY_INDEX(FAC_VF)] & FACILITY_BIT(FAC_VF))
+ features->have_vector_facility = 1;
+
if (facilities[FACILITY_INDEX(FAC_MSA)] & FACILITY_BIT(FAC_MSA))
{
uint64_t query_status[2] = {0};
}
}
+/* MEMXOR3 */
+DECLARE_FAT_FUNC(nettle_memxor3, memxor3_func)
+DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, c)
+DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, s390x)
+
/* AES128 */
DECLARE_FAT_FUNC(nettle_aes128_set_encrypt_key, aes128_set_key_func)
DECLARE_FAT_FUNC_VAR(aes128_set_encrypt_key, aes128_set_key_func, c)
get_s390x_features (&features);
verbose = getenv (ENV_VERBOSE) != NULL;
+ /* MEMXOR3 */
+ if (features.have_vector_facility)
+ {
+ if (verbose)
+ fprintf (stderr, "libnettle: enabling vectorized memxor3.\n");
+ nettle_memxor3_vec = _nettle_memxor3_s390x;
+ }
+ else
+ {
+ nettle_memxor3_vec = _nettle_memxor3_c;
+ }
+
/* AES128 */
if (features.have_km_aes128)
{
}
}
+/* MEMXOR3 */
+DEFINE_FAT_FUNC(nettle_memxor3, void *,
+ (void *dst_in, const void *a_in, const void *b_in, size_t n),
+ (dst_in, a_in, b_in, n))
+
/* AES128 */
DEFINE_FAT_FUNC(nettle_aes128_set_encrypt_key, void,
(struct aes128_ctx *ctx, const uint8_t *key),
size_t length, const uint8_t *data);
typedef void *(memxor_func)(void *dst, const void *src, size_t n);
+typedef void *(memxor3_func)(void *dst_in, const void *a_in, const void *b_in, size_t n);
typedef void salsa20_core_func (uint32_t *dst, const uint32_t *src, unsigned rounds);
typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds,
#define WORD_T_THRESH 16
+/* For fat builds */
+#if HAVE_NATIVE_memxor3
+void *
+_nettle_memxor3_c(void *dst_in, const void *a_in, const void *b_in, size_t n);
+# define nettle_memxor3 _nettle_memxor3_c
+#endif
+
/* XOR word-aligned areas. n is the number of words, not bytes. */
static void
memxor3_common_alignment (word_t *dst,
internally by cbc decrypt, and it is not advertised or documented
to nettle users. */
void *
-memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n)
+nettle_memxor3(void *dst_in, const void *a_in,
+ const void *b_in, size_t n)
{
unsigned char *dst = dst_in;
const unsigned char *a = a_in;
--- /dev/null
+C s390x/fat/memxor3-2.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_memxor3) picked up by configure
+
+define(`fat_transform', `_$1_s390x')
+include_src(`s390x/vf/memxor3.asm')
--- /dev/null
+C s390/memxor3.asm
+
+ifelse(`
+ Copyright (C) 2020 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "memxor3.asm"
+
+.text
+
+C void * memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n)
+
+PROLOGUE(nettle_memxor3)
+ agr %r2,%r5
+ agr %r3,%r5
+ agr %r4,%r5
+Lmod:
+ risbg %r1,%r5,60,191,0
+ jz L1x
+ sgr %r3,%r1
+ sgr %r4,%r1
+ sgr %r2,%r1
+ aghi %r1,-1 C highest index
+ vll %v24,%r1,0(%r3)
+ vll %v28,%r1,0(%r4)
+ vx %v24,%v24,%v28
+ vstl %v24,%r1,0(%r2)
+L1x:
+ risbg %r1,%r5,58,187,0
+ jz L4x
+ srlg %r1,%r1,4 C 1-block loop count 'n / 16'
+L1x_loop:
+ aghi %r3,-16
+ aghi %r4,-16
+ aghi %r2,-16
+ vl %v24,0(%r3),0
+ vl %v28,0(%r4),0
+ vx %v24,%v24,%v28
+ vst %v24,0(%r2),0
+ brctg %r1,L1x_loop
+L4x:
+ risbg %r1,%r5,0,185,0
+ jz Ldone
+ srlg %r1,%r1,6 C 4-blocks loop count 'n / (16 * 4)'
+L4x_loop:
+ aghi %r3,-64
+ aghi %r4,-64
+ aghi %r2,-64
+ vlm %v24,%v27,0(%r3),0
+ vlm %v28,%v31,0(%r4),0
+ vx %v24,%v24,%v28
+ vx %v25,%v25,%v29
+ vx %v26,%v26,%v30
+ vx %v27,%v27,%v31
+ vstm %v24,%v27,0(%r2),0
+ brctg %r1,L4x_loop
+Ldone:
+ br RA
+EPILOGUE(nettle_memxor3)