*
* 10/14: rberinde: Added SSE3 code and test, cleaned up a bit.
*
- * 10/15: rberinde: Added multibuffer AVX2 code.
- *
* If any changes are made to this file, please run:
* test-esx -n misc/sha1.sh
*/
#endif
#include "vmware.h"
-#ifdef VMKBOOT
-#include "vm_libc.h"
-#endif
-#if defined(VMKERNEL)
-#include "vmkernel.h"
-#include "vm_libc.h"
-#endif /* VMKERNEL */
#include "sha1.h"
#include "vm_basic_asm.h"
#include "vmk_exports.h"
0x10325476,
0xC3D2E1F0 };
-/*
- * The SSSE3 implementation is only 64-bit and it is excluded from monitor,
- * tools, vmx, workstation, and boot components, as well as Windows, Mac, and
- * FreeBSD builds. Also disabled for clang builds.
- */
-#if defined(VM_X86_64) && !defined(VMCORE) && !defined(VMKBOOT) && \
- !defined(VMX86_DESKTOP) && !defined(VMX86_TOOLS) && !defined(__APPLE__) && \
- !defined(_WIN32) && !defined(__FreeBSD__) && !defined(__clang__)
-
-#include <x86sse.h>
-
-/*
- * In the kernel we disable preemption during SSE sections, so we limit each
- * section to 16 block updates (where each block is 64 bytes); this is on the
- * order of 1-2 microseconds.
- */
-#define SHA1_SSE_BLOCKS_PER_ITERATION 16
-
-void SHA1_Transform_SSSE3_ASM(uint32 *hash,
- const uint8 *input,
- uint64 numBlocks);
-
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1TransformSSSE3 --
- *
- * Speed up transformation with SSSE3 if possible.
- *
- * Results:
- * TRUE if we were able to use SSSE3 to apply the transform.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-static INLINE Bool
-SHA1TransformSSSE3(uint32 state[5], // IN/OUT
- const unsigned char *buffer, // IN
- uint32 numBlocks) // IN
-{
- static int useSSE = -1;
- X86SSE_SaveState save;
-
- ASSERT(useSSE == -1 || useSSE == 0 || useSSE == 1);
-
- /* This is safe even if multiple threads race here. */
- if (useSSE == -1) {
- useSSE = X86SSE_IsSSSE3Supported();
- }
-
- if (useSSE == 0) {
- return FALSE;
- }
-
- /*
- * In debug mode, don't use SSE some of the time to make
- * sure the non-SSE version is tested as well.
- */
- if (vmx86_debug && RDTSC() % 101 < 20) {
- return FALSE;
- }
-
-#ifdef VMKERNEL
- if (!INTERRUPTS_ENABLED()) {
- return FALSE;
- }
-#endif
-
- while (numBlocks > 0) {
- uint32 blocksInIter = MIN(numBlocks, SHA1_SSE_BLOCKS_PER_ITERATION);
-
- X86SSE_Prologue(&save, FALSE /* no AVX2 */);
- SHA1_Transform_SSSE3_ASM(state, buffer, blocksInIter);
- X86SSE_Epilogue(&save);
-
- numBlocks -= blocksInIter;
- buffer += blocksInIter * 64;
- }
-
- return TRUE;
-}
-
-#else
-
-/* SSSE3 stub for unsupported targets. */
-static INLINE Bool
-SHA1TransformSSSE3(uint32 state[5],
- const unsigned char *buffer,
- uint32 numBlocks)
-{
- return FALSE;
-}
-
-#endif
-
-
-/*
- * The AVX2 multi-buffer implementation is 64-bit only and requires GCC 4.7 or
- * newer. For now it is only included in the vmkernel.
- */
-#if defined(VM_X86_64) && defined(VMKERNEL)
-
-void SHA1_Transform_AVX2_X8_ASM(uint32 transposedDigests[5][8],
- const uint8 *input[8],
- uint64 numBlocks);
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1MultiBufferAVX2 --
- *
- * Uses AVX2 code to compute the SHA1 of at most SHA1_MULTI_MAX_BUFFERS
- * buffers (if possible).
- *
- * Note: size must be a multiple of 64!
- *
- * Results:
- * TRUE if we were able to use AVX2 to compute the hashes.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-static INLINE Bool
-SHA1MultiBufferAVX2(uint32 numBuffers, // IN
- uint32 len, // IN
- const void *data[], // IN
- unsigned char **digests) // OUT
-{
- static int useAVX2 = -1;
-
- uint32 i, j;
- uint32 numBlocks = len / 64;
-
- uint32 state[5][SHA1_MULTI_MAX_BUFFERS];
- const uint8 *ptrs[SHA1_MULTI_MAX_BUFFERS];
-
- /* Last block. */
- uint8 lastBlock[64] = {0x80};
-
- X86SSE_SaveState save;
-
- ASSERT(numBuffers <= SHA1_MULTI_MAX_BUFFERS);
- ASSERT(len % 64 == 0);
-
- /*
- * In debug mode, don't use AVX2 some of the time to make
- * sure the non-SSE version is tested as well.
- */
- if (vmx86_debug && RDTSC() % 101 < 20) {
- return FALSE;
- }
-
- ASSERT(useAVX2 == -1 || useAVX2 == 0 || useAVX2 == 1);
-
- /* This is safe even if multiple threads race here. */
- if (useAVX2 == -1) {
- useAVX2 = X86SSE_IsAVX2Supported();
- }
-
- if (useAVX2 == 0) {
- return FALSE;
- }
-
- if (numBuffers < 3) {
- /*
- * This routine is about 2x slower than the regular routine; it is
- * not worth using with less than 3 buffers.
- */
- return FALSE;
- }
-
- /* Encode the length (in bits, big endian). */
- for (i = 0; i < 4; i++) {
- lastBlock[63 - i] = ((len * 8) >> (i * 8)) & 0xFF;
- }
-
- for (i = 0; i < 5; i++) {
- for (j = 0; j < SHA1_MULTI_MAX_BUFFERS; j++) {
- state[i][j] = sha1InitVec[i];
- }
- }
-
- for (i = 0; i < 8; i++) {
- if (i < numBuffers) {
- ptrs[i] = data[i];
- } else {
- /*
- * We don't care about these buffers but the pointers
- * need to be valid.
- */
- ptrs[i] = data[0];
- }
- }
-
- while (numBlocks > 0) {
- uint32 blocksInIter = MIN(numBlocks, SHA1_SSE_BLOCKS_PER_ITERATION);
-
- X86SSE_Prologue(&save, TRUE /* AVX2 */);
- SHA1_Transform_AVX2_X8_ASM(state, ptrs, blocksInIter);
-
- numBlocks -= blocksInIter;
- for (i = 0; i < 8; i++) {
- ptrs[i] += 64 * blocksInIter;
- }
-
- if (numBlocks == 0) {
- /* Do the last block. */
- for (i = 0; i < 8; i++) {
- ptrs[i] = lastBlock;
- }
- SHA1_Transform_AVX2_X8_ASM(state, ptrs, 1);
- }
- X86SSE_Epilogue(&save);
- }
-
- for (i = 0; i < numBuffers; i++) {
- for (j = 0; j < SHA1_HASH_LEN; j++) {
- digests[i][j] = (state[j / 4][i] >> ((3 - j % 4) * 8)) & 0xFF;
- }
- }
-
- return TRUE;
-}
-
-#else
-
-/* AVX2 stub for unsupported targets. */
-static INLINE Bool
-SHA1MultiBufferAVX2(uint32 numBuffers,
- uint32 len,
- const void *data[],
- unsigned char **digests)
-{
- ASSERT(numBuffers <= SHA1_MULTI_MAX_BUFFERS);
- ASSERT(len % 64 == 0);
- return FALSE;
-}
-
-#endif
-
-
/* If the endianess is not defined (it is done in string.h of glibc 2.1.1), we
default to LE --hpreg */
#ifndef LITTLE_ENDIAN
{
uint32 i;
- if (SHA1TransformSSSE3(state, buffer, numBlocks)) {
- return;
- }
-
for (i = 0; i < numBlocks; i++) {
unsigned char workspace[64];
}
context->count[0] = context->count[1] = 0;
}
-VMK_KERNEL_EXPORT(SHA1Init);
/*
ASSERT(len + curOfs < 64);
memcpy(&context->buffer[curOfs], data, len);
}
-VMK_KERNEL_EXPORT(SHA1Update);
/*
memset(context->count, 0, 8);
memset(&finalcount, 0, 8);
}
-VMK_KERNEL_EXPORT(SHA1Final);
-
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1RawBufferHash --
- *
- * Finds the SHA-1 of a "raw" buffer, without doing any preprocessing (does
- * NOT add the 0x80 byte, 0x00 padding, and length encoding required for
- * the result to be a proper message digest).
- *
- * Useful if the buffer already contains the preprocessed data, or if
- * we are computing and comparing hashes for fixed-size blocks.
- *
- * The buffer size must be a multiple of 64 bytes.
- *
- * !! WARNING !! Do not use unless you know what you are doing! This
- * will NOT compute the "usual" digest of the given buffer.
- *
- * Results:
- * 'result' contains the 160-bit SHA-1 value.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-void
-SHA1RawBufferHash(const void *data, // IN
- uint32 size, // IN
- uint32 result[5]) // OUT
-{
- uint32 i;
-
- ASSERT(size % 64 == 0);
-
- for (i = 0; i < 5; i++) {
- result[i] = sha1InitVec[i];
- }
- SHA1Transform(result, (const uint8 *) data, size / 64);
-}
-VMK_KERNEL_EXPORT(SHA1RawBufferHash);
-
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1RawTransformBlocks --
- *
- * !! WARNING !! Do not use unless you know what you are doing! This
- * will NOT compute the "usual" digest of the given buffer.
- * Meaning the function does _not_ do padding or handle variable buffer
- * length. The buffer size must be a multiple of 64 and the caller should to
- * take care of the initial state.
- *
- * Finds the SHA-1 of a "raw" buffer, without doing any preprocessing (does
- * NOT add the 0x80 byte, 0x00 padding, and length encoding required for
- * the result to be a proper message digest).
- *
- * This function is the alternative to SHA1RawBufferHash to control the
- * initial state. Used by the native random driver.
- *
- * Results:
- * 'state' contains the 160-bit SHA-1 value.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-void
-SHA1RawTransformBlocks(uint32 state[5], // IN/OUT
- const unsigned char *buffer, // IN
- uint32 numBlocks) // IN
-{
- ASSERT(buffer != NULL);
- SHA1Transform(state, buffer, numBlocks);
-}
-VMK_KERNEL_EXPORT(SHA1RawTransformBlocks);
-
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1RawInit --
- *
- * Set the initial state for a RAW SHA1 transformations.
- *
- * You probably want to use SHA1Init.
- *
- * Results:
- * Fill 'state' with initial SHA1 values.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-void
-SHA1RawInit(uint32 state[5]) // OUT
-{
- state[0] = sha1InitVec[0];
- state[1] = sha1InitVec[1];
- state[2] = sha1InitVec[2];
- state[3] = sha1InitVec[3];
- state[4] = sha1InitVec[4];
-}
-VMK_KERNEL_EXPORT(SHA1RawInit);
-
-
-/*
- *-----------------------------------------------------------------------------
- *
- * SHA1MultiBuffer --
- *
- * Computes the digests for multiple buffers of the same length. Supports
- * at most SHA1_MULTI_MAX_BUFFERS buffers.
- *
- * On recent processors (with AVX2) this function yields significantly more
- * aggregate throughput compared to hashing each buffer separately. Maximum
- * throughput is obtained for SHA1_MULTI_MAX_BUFFERS.
- *
- * Note: currently, the better throughput is only obtained if the length is
- * a multiple of 64.
- *
- * Results:
- * The computed digests in digest[i][j], with 0 <= i < numBuffers and
- * 0 <= j < SHA1_HASH_LEN.
- *
- * Side effects:
- * None.
- *
- *-----------------------------------------------------------------------------
- */
-
-void
-SHA1MultiBuffer(uint32 numBuffers, // IN
- uint32 len, // IN
- const void *data[], // IN
- unsigned char *digests[]) // OUT
-{
- uint32 i;
-
- ASSERT(numBuffers > 0);
- ASSERT(numBuffers <= SHA1_MULTI_MAX_BUFFERS);
-
- if (len % 64 == 0 &&
- SHA1MultiBufferAVX2(numBuffers, len, data, digests)) {
- return;
- }
-
- /* Use the regular routine. */
- for (i = 0; i < numBuffers; i++) {
- SHA1_CTX ctx;
-
- SHA1Init(&ctx);
- SHA1Update(&ctx, data[i], len);
- SHA1Final(digests[i], &ctx);
- }
-}
-VMK_KERNEL_EXPORT(SHA1MultiBuffer);