2013-04-16 Niels Möller <nisse@lysator.liu.se>
+ * asm.m4 (m4_log2): New macro, similar to the one in gmp.
+ (ALIGN): Changed to take alignment in bytes. Updated all callers,
+ currently used only in x86 and x86_64 files.
+
* umac.h (umac32_ctx, umac64_ctx, umac96_ctx, umac128_ctx): Make
block count an uint64_t. Reorder some elements to put short values
together.
<ifelse(ELF_STYLE,yes,
<.size C_NAME($1), . - C_NAME($1)>,<>)>)
-dnl Argument to ALIGN is always logarithmic
-dnl FIXME: the << operator is not supported by Solaris m4,
-dnl and ** is not supported by OpenBSD m4.
-dnl We should switch to non-logarithmic ALIGN instead.
+define(<m4_log2>, <m4_log2_internal($1,1,0)>)
+define(<m4_log2_internal>,
+<ifelse($3, 10, <not-a-power-of-two>,
+$1, $2, $3,
+<m4_log2_internal($1, eval(2*$2), eval(1 + $3))>)>)
+
+dnl Argument to ALIGN is always in bytes, and converted to a
+dnl logarithmic .align if necessary.
-dnl Need changequote to be able to use the << operator.
define(<ALIGN>,
-<changequote([,])dnl
-.align ifelse(ALIGN_LOG,yes,$1,eval(1 << $1))dnl >> balance
-changequote(<,>)dnl
+<.align ifelse(ALIGN_LOG,yes,<m4_log2($1)>,$1)
>)
dnl Struct defining macros
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_aes_decrypt)
C save all registers that need to be saved
pushl %ebx C 20(%esp)
addl $16,KEY C point to next key
movl KEY,FRAME_KEY
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
AES_ROUND(T, SA,SD,SC,SB, TMP, KEY)
movl TMP, TA
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_aes_encrypt)
C save all registers that need to be saved
pushl %ebx C 20(%esp)
addl $16,KEY C point to next key
movl KEY,FRAME_KEY
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
AES_ROUND(T, SA,SB,SC,SD, TMP, KEY)
movl TMP, TA
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_arcfour_crypt)
C save all registers that need to be saved
pushl %ebx C 12(%esp)
sarl $1, %edx
jc .Lloop_odd
- ALIGN(4)
+ ALIGN(16)
.Lloop:
movb (%ebp, %eax), %cl C si.
addb %cl, %bl
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
C save all registers that need to be saved
pushl %ebx C 32(%esp)
C _nettle_md5_compress(uint32_t *state, uint8_t *data)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_md5_compress)
C save all registers that need to be saved
C Loop-mixed to 520 cycles (for the complete function call) on
C AMD K7.
-ALIGN(5)
+ALIGN(32)
mov 88(%esp), T2
mov OFFSET(2)(T2), %ecx
mov OFFSET(0)(T2), %eax
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_aes_decrypt)
W64_ENTRY(5, 0)
test PARAM_LENGTH, PARAM_LENGTH
subl $1, COUNT
add $16,KEY C point to next key
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
AES_ROUND(TABLE, SA,SD,SC,SB, TA, TMP)
AES_ROUND(TABLE, SB,SA,SD,SC, TB, TMP)
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_aes_encrypt)
W64_ENTRY(5, 0)
test PARAM_LENGTH, PARAM_LENGTH
subl $1, COUNT
add $16,KEY C point to next key
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMP)
AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMP)
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
W64_ENTRY(5, 0)
C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_ecc_192_modp)
W64_ENTRY(2, 0)
mov 16(RP), T2
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
C memxor(uint8_t *dst, const uint8_t *src, size_t n)
C %rdi %rsi %rdx
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(memxor)
W64_ENTRY(3, 0)
C memxor3(uint8_t *dst, const uint8_t *a, const uint8_t *b, size_t n)
C %rdi %rsi %rdx %rcx
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(memxor3)
W64_ENTRY(4, 0)
jz .Ldone
jmp .Lshift_next
- ALIGN(4)
+ ALIGN(16)
.Lshift_loop:
mov 8(AP, N), S0
jmp .Lword_next
- ALIGN(4)
+ ALIGN(16)
.Lword_loop:
mov 8(AP, N), TMP
mov TMP, (DST, N)
jmp .Lsse2_next
- ALIGN(4)
+ ALIGN(16)
.Lsse2_loop:
movdqu (AP, N), %xmm0
movdqu (BP, N), %xmm1
C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_salsa20_core)
W64_ENTRY(3, 9)
shrl $1, XREG(COUNT)
- ALIGN(4)
+ ALIGN(16)
.Loop:
QROUND(X0, X1, X2, X3)
pshufd $0x93, X1, X1 C 11 00 01 10 (least sign. left)
C salsa20_crypt(struct salsa20_ctx *ctx, unsigned length,
C uint8_t *dst, const uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_salsa20_crypt)
W64_ENTRY(4, 9)
SWAP(X0, X2, M0011)
movl $10, XREG(COUNT)
- ALIGN(4)
+ ALIGN(16)
.Loop:
QROUND(X0, X1, X2, X3)
C For the row operations, we first rotate the rows, to get
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_serpent_decrypt)
C save all registers that need to be saved
W64_ENTRY(4, 13)
jmp .Lwround_start
- ALIGN(4)
+ ALIGN(16)
.Lwround_loop:
WLTI(X0,X1,X2,X3)
mov $384, CNT
jmp .Lround_start
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
LTI(x0,x1,x2,x3)
.Lround_start:
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_serpent_encrypt)
C save all registers that need to be saved
W64_ENTRY(4, 13)
mov $-512, CNT
jmp .Lwround_start
- ALIGN(4)
+ ALIGN(16)
.Lwround_loop:
WLT(X0,X1,X2,X3)
.Lwround_start:
mov $-512, CNT
jmp .Lround_start
- ALIGN(4)
+ ALIGN(16)
.Lround_loop:
LT(x0,x1,x2,x3)
.Lround_start:
C _nettle_sha1_compress(uint32_t *state, uint8_t *input)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_sha1_compress)
C save all registers that need to be saved
W64_ENTRY(2, 0)
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_sha256_compress)
W64_ENTRY(3, 0)
movl 24(STATE), SG
movl 28(STATE), SH
xor COUNT, COUNT
- ALIGN(4)
+ ALIGN(16)
.Loop1:
NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0)
C sha3_permute(struct sha3_state *ctx)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(nettle_sha3_permute)
W64_ENTRY(1, 16)
push %rbp
pxor A2122, C12
pxor A2324, C34
- ALIGN(4)
+ ALIGN(16)
.Loop:
C The theta step. Combine parity bits, then xor to state.
C D0 = C4 ^ (C1 <<< 1)
EPILOGUE(nettle_sha3_permute)
-ALIGN(4)
+ALIGN(16)
.rc: C In reverse order
.quad 0x8000000080008008
.quad 0x0000000080000001
C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_sha512_compress)
W64_ENTRY(3, 0)
mov 48(STATE), SG
mov 56(STATE), SH
xor COUNT, COUNT
- ALIGN(4)
+ ALIGN(16)
.Loop1:
NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0)
C umac_nh_n(uint64_t *out, unsigned n, const uint32_t *key,
C unsigned length, const uint8_t *msg)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_umac_nh_n)
W64_ENTRY(5, 14)
pxor XY0, XY0
C umac_nh(const uint32_t *key, unsigned length, const uint8_t *msg)
.text
- ALIGN(4)
+ ALIGN(16)
PROLOGUE(_nettle_umac_nh)
W64_ENTRY(3, 7)
pxor XY, XY