# include "config.h"
#endif
+#include <assert.h>
#include <limits.h>
#include "memxor.h"
words, not bytes. Assumes we can read complete words at the start
and end of the src operand. */
static void
-memxor_different_alignment (word_t *dst, const char *src, size_t n)
+memxor_different_alignment (word_t *dst, const unsigned char *src, size_t n)
{
int shl, shr;
const word_t *src_word;
unsigned offset = ALIGN_OFFSET (src);
word_t s0, s1;
+ const unsigned char *part;
+ unsigned i;
+ assert (n > 0);
shl = CHAR_BIT * offset;
shr = CHAR_BIT * (sizeof(word_t) - offset);
src_word = (const word_t *) ((uintptr_t) src & -sizeof(word_t));
+ /* Read top offset bytes, in native byte order. */
+ part = src + n*sizeof(word_t) - offset;
+#if WORDS_BIGENDIAN
+ for (s0 = part[0], i = 1; i < offset; i++)
+ s0 = (s0 << CHAR_BIT) | part[i];
+ s0 <<= shr; /* FIXME: Eliminate this shift? */
+#else /* !WORDS_BIGENDIAN */
+ for (i = offset, s0 = part[--i]; i > 0 ; )
+ s0 = (s0 << CHAR_BIT) | part[--i];
+#endif /* !WORDS_BIGENDIAN */
+
+ /* Do n-1 regular iterations */
if (n & 1)
+ s1 = s0;
+ else
{
n--;
s1 = src_word[n];
- s0 = src_word[n+1]; /* FIXME: Overread */
dst[n] ^= MERGE (s1, shl, s0, shr);
}
- else
- s1 = src_word[n]; /* FIXME: Overread */
- while (n > 0)
+ assert (n & 1);
+ while (n > 2)
{
n -= 2;
s0 = src_word[n+1];
s1 = src_word[n]; /* FIXME: Overread on last iteration */
dst[n] ^= MERGE(s1, shl, s0, shr);
}
+ assert (n == 1);
+ /* Read low wordsize - offset bytes */
+#if WORDS_BIGENDIAN
+ for (s0 = src[0], i = 1; i < sizeof(word_t) - offset; i++)
+ s0 = (s0 << CHAR_BIT) | src[i];
+#else /* !WORDS_BIGENDIAN */
+ for (i = sizeof(word_t) - offset, s0 = src[--i]; i > 0 ; )
+ s0 = (s0 << CHAR_BIT) | src[--i];
+ s0 <<= shl; /* FIXME: eliminate shift? */
+#endif /* !WORDS_BIGENDIAN */
+ dst[0] ^= MERGE(s0, shl, s1, shr);
}
/* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
void *
memxor(void *dst_in, const void *src_in, size_t n)
{
- char *dst = dst_in;
- const char *src = src_in;
+ unsigned char *dst = dst_in;
+ const unsigned char *src = src_in;
if (n >= WORD_T_THRESH)
{
n %= sizeof (word_t);
if (offset)
- memxor_different_alignment ((word_t *) (dst+n), (src+n), nwords);
+ memxor_different_alignment ((word_t *) (dst+n), src+n, nwords);
else
memxor_common_alignment ((word_t *) (dst+n),
(const word_t *) (src+n), nwords);