#endif
void
-_camellia_crypt(const struct camellia_ctx *ctx,
+_camellia_crypt(unsigned rounds,
+ const uint64_t *keys,
const struct camellia_table *T,
size_t length, uint8_t *dst,
const uint8_t *src)
i1 = READ_UINT64(src + 8);
/* pre whitening but absorb kw2*/
- i0 ^= ctx->keys[0];
+ i0 ^= keys[0];
/* main iteration */
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[1], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[2], i0);
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[3], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[4], i0);
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[5], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[6], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[1], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[2], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[3], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[4], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[5], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[6], i0);
- for (i = 0; i < ctx->nkeys - 8; i+= 8)
+ for (i = 0; i < rounds - 8; i+= 8)
{
- CAMELLIA_FL(i0, ctx->keys[i+7]);
- CAMELLIA_FLINV(i1, ctx->keys[i+8]);
+ CAMELLIA_FL(i0, keys[i+7]);
+ CAMELLIA_FLINV(i1, keys[i+8]);
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[i+9], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[i+10], i0);
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[i+11], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[i+12], i0);
- CAMELLIA_ROUNDSM(T, i0,ctx->keys[i+13], i1);
- CAMELLIA_ROUNDSM(T, i1,ctx->keys[i+14], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[i+9], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[i+10], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[i+11], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[i+12], i0);
+ CAMELLIA_ROUNDSM(T, i0, keys[i+13], i1);
+ CAMELLIA_ROUNDSM(T, i1, keys[i+14], i0);
}
/* post whitening but kw4 */
- i1 ^= ctx->keys[i+7];
+ i1 ^= keys[i+7];
WRITE_UINT64(dst , i1);
WRITE_UINT64(dst + 8, i0);
define(<FRAME_CNT>, <16(%esp)>)
C Arguments on stack.
-define(<FRAME_CTX>, <40(%esp)>)
-define(<FRAME_TABLE>, <44(%esp)>)
-define(<FRAME_LENGTH>, <48(%esp)>)
-define(<FRAME_DST>, <52(%esp)>)
-define(<FRAME_SRC>, <56(%esp)>)
+define(<FRAME_ROUNDS>, <40(%esp)>)
+define(<FRAME_KEYS>, <44(%esp)>)
+define(<FRAME_TABLE>, <48(%esp)>)
+define(<FRAME_LENGTH>, <52(%esp)>)
+define(<FRAME_DST>, <56(%esp)>)
+define(<FRAME_SRC>, <60(%esp)>)
define(<SP1110>, <(T,$1,4)>)
define(<SP0222>, <1024(T,$1,4)>)
.file "camellia-encrypt-internal.asm"
- C _camellia_crypt(struct camellia_context *ctx,
+ C _camellia_crypt(unsigned rounds, const uint64_t *keys,
C const struct camellia_table *T,
C size_t length, uint8_t *dst,
C uint8_t *src)
movl 12(TMP), L1
bswap L1
addl $16, FRAME_SRC
- movl FRAME_CTX, KEY
- movl (KEY), TMP
+ movl FRAME_KEYS, KEY
+ movl FRAME_ROUNDS, TMP
subl $8, TMP
movl TMP, FRAME_CNT
- C Whitening using first subkey
- addl $ALIGNOF_UINT64_T + 8, KEY
- xorl -8(KEY), L0
- xorl -4(KEY), H0
+ xorl (KEY), L0
+ xorl 4(KEY), H0
+ addl $8, KEY
movl FRAME_TABLE, T
C Register usage:
-define(<CTX>, <%rdi>)
-define(<TABLE>, <%rsi>)
-define(<LENGTH>, <%rdx>)
-define(<DST>, <%rcx>)
-define(<SRC>, <%r8>)
+define(<ROUNDS>, <%rdi>)
+define(<KEYS>, <%rsi>)
+define(<TABLE>, <%rdx>)
+define(<LENGTH>, <%rcx>)
+define(<DST>, <%r8>)
+define(<SRC>, <%r9>)
C Camellia state
define(<I0>, <%rax>)
define(<I1>, <%rbx>) C callee-save
-define(<KEY>, <%r9>)
+define(<KEY>, <%r13>) C callee-save
define(<TMP>, <%rbp>) C callee-save
define(<CNT>, <%r10>)
define(<IL>, <%r11>)
.file "camellia-encrypt-internal.asm"
- C _camellia_crypt(struct camellia_context *ctx,
+ C _camellia_crypt(unsigned rounds, const uint64_t *keys,
C const struct camellia_table *T,
C size_t length, uint8_t *dst,
C uint8_t *src)
push %rbx
push %rbp
push %r12
-
+ push %r13
+ sub $8, ROUNDS
.Lblock_loop:
C Load data, note that we'll happily do unaligned loads
mov (SRC), I0
mov 8(SRC), I1
bswap I1
add $16, SRC
- mov CTX, KEY
- movl (KEY), XREG(CNT)
- sub $8, CNT
+ mov XREG(ROUNDS), XREG(CNT)
+ mov KEYS, KEY
C Whitening using first subkey
- xor 8(KEY), I0
- add $16, KEY
+ xor (KEY), I0
+ add $8, KEY
ROUND(I0, I1, 0)
ROUND(I1, I0, 8)
ja .Lblock_loop
+ pop %r13
pop %r12
pop %rbp
pop %rbx