crypto/include/internal/md32_common.h

   1 /*
   2  * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the OpenSSL license (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /*-
  11  * This is a generic 32 bit "collector" for message digest algorithms.
  12  * Whenever needed it collects input character stream into chunks of
  13  * 32 bit values and invokes a block function that performs actual hash
  14  * calculations.
  15  *
  16  * Porting guide.
  17  *
  18  * Obligatory macros:
  19  *
  20  * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
  21  *      this macro defines byte order of input stream.
  22  * HASH_CBLOCK
  23  *      size of a unit chunk HASH_BLOCK operates on.
  24  * HASH_LONG
  25  *      has to be at lest 32 bit wide.
  26  * HASH_CTX
  27  *      context structure that at least contains following
  28  *      members:
  29  *              typedef struct {
  30  *                      ...
  31  *                      HASH_LONG       Nl,Nh;
  32  *                      either {
  33  *                      HASH_LONG       data[HASH_LBLOCK];
  34  *                      unsigned char   data[HASH_CBLOCK];
  35  *                      };
  36  *                      unsigned int    num;
  37  *                      ...
  38  *                      } HASH_CTX;
  39  *      data[] vector is expected to be zeroed upon first call to
  40  *      HASH_UPDATE.
  41  * HASH_UPDATE
  42  *      name of "Update" function, implemented here.
  43  * HASH_TRANSFORM
  44  *      name of "Transform" function, implemented here.
  45  * HASH_FINAL
  46  *      name of "Final" function, implemented here.
  47  * HASH_BLOCK_DATA_ORDER
  48  *      name of "block" function capable of treating *unaligned* input
  49  *      message in original (data) byte order, implemented externally.
  50  * HASH_MAKE_STRING
  51  *      macro convering context variables to an ASCII hash string.
  52  *
  53  * MD5 example:
  54  *
  55  *      #define DATA_ORDER_IS_LITTLE_ENDIAN
  56  *
  57  *      #define HASH_LONG               MD5_LONG
  58  *      #define HASH_CTX                MD5_CTX
  59  *      #define HASH_CBLOCK             MD5_CBLOCK
  60  *      #define HASH_UPDATE             MD5_Update
  61  *      #define HASH_TRANSFORM          MD5_Transform
  62  *      #define HASH_FINAL              MD5_Final
  63  *      #define HASH_BLOCK_DATA_ORDER   md5_block_data_order
  64  *
  65  *                                      <appro@fy.chalmers.se>
  66  */
  67
  68 #if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
  69 # error "DATA_ORDER must be defined!"
  70 #endif
  71
  72 #ifndef HASH_CBLOCK
  73 # error "HASH_CBLOCK must be defined!"
  74 #endif
  75 #ifndef HASH_LONG
  76 # error "HASH_LONG must be defined!"
  77 #endif
  78 #ifndef HASH_CTX
  79 # error "HASH_CTX must be defined!"
  80 #endif
  81
  82 #ifndef HASH_UPDATE
  83 # error "HASH_UPDATE must be defined!"
  84 #endif
  85 #ifndef HASH_TRANSFORM
  86 # error "HASH_TRANSFORM must be defined!"
  87 #endif
  88 #ifndef HASH_FINAL
  89 # error "HASH_FINAL must be defined!"
  90 #endif
  91
  92 #ifndef HASH_BLOCK_DATA_ORDER
  93 # error "HASH_BLOCK_DATA_ORDER must be defined!"
  94 #endif
  95
  96 /*
  97  * Engage compiler specific rotate intrinsic function if available.
  98  */
  99 #undef ROTATE
 100 #ifndef PEDANTIC
 101 # if defined(_MSC_VER)
 102 #  define ROTATE(a,n)   _lrotl(a,n)
 103 # elif defined(__ICC)
 104 #  define ROTATE(a,n)   _rotl(a,n)
 105 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 106   /*
 107    * Some GNU C inline assembler templates. Note that these are
 108    * rotates by *constant* number of bits! But that's exactly
 109    * what we need here...
 110    *                                    <appro@fy.chalmers.se>
 111    */
 112 #  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 113 #   define ROTATE(a,n)  ({ register unsigned int ret;   \
 114                                 asm (                   \
 115                                 "roll %1,%0"            \
 116                                 : "=r"(ret)             \
 117                                 : "I"(n), "0"((unsigned int)(a))        \
 118                                 : "cc");                \
 119                            ret;                         \
 120                         })
 121 #  elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
 122         defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
 123 #   define ROTATE(a,n)  ({ register unsigned int ret;   \
 124                                 asm (                   \
 125                                 "rlwinm %0,%1,%2,0,31"  \
 126                                 : "=r"(ret)             \
 127                                 : "r"(a), "I"(n));      \
 128                            ret;                         \
 129                         })
 130 #  elif defined(__s390x__)
 131 #   define ROTATE(a,n) ({ register unsigned int ret;    \
 132                                 asm ("rll %0,%1,%2"     \
 133                                 : "=r"(ret)             \
 134                                 : "r"(a), "I"(n));      \
 135                           ret;                          \
 136                         })
 137 #  endif
 138 # endif
 139 #endif                          /* PEDANTIC */
 140
 141 #ifndef ROTATE
 142 # define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
 143 #endif
 144
 145 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
 146
 147 # ifndef PEDANTIC
 148 #  if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 149 #   if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
 150       (defined(__x86_64) || defined(__x86_64__))
 151 #    if !defined(B_ENDIAN)
 152     /*
 153      * This gives ~30-40% performance improvement in SHA-256 compiled
 154      * with gcc [on P4]. Well, first macro to be frank. We can pull
 155      * this trick on x86* platforms only, because these CPUs can fetch
 156      * unaligned data without raising an exception.
 157      */
 158 #     define HOST_c2l(c,l)        ({ unsigned int r=*((const unsigned int *)(c)); \
 159                                    asm ("bswapl %0":"=r"(r):"0"(r));    \
 160                                    (c)+=4; (l)=r;                       })
 161 #     define HOST_l2c(l,c)        ({ unsigned int r=(l);                  \
 162                                    asm ("bswapl %0":"=r"(r):"0"(r));    \
 163                                    *((unsigned int *)(c))=r; (c)+=4; r; })
 164 #    endif
 165 #   elif defined(__aarch64__)
 166 #    if defined(__BYTE_ORDER__)
 167 #     if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
 168 #      define HOST_c2l(c,l)      ({ unsigned int r;              \
 169                                    asm ("rev    %w0,%w1"        \
 170                                         :"=r"(r)                \
 171                                         :"r"(*((const unsigned int *)(c))));\
 172                                    (c)+=4; (l)=r;               })
 173 #      define HOST_l2c(l,c)      ({ unsigned int r;              \
 174                                    asm ("rev    %w0,%w1"        \
 175                                         :"=r"(r)                \
 176                                         :"r"((unsigned int)(l)));\
 177                                    *((unsigned int *)(c))=r; (c)+=4; r; })
 178 #     elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
 179 #      define HOST_c2l(c,l)      ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
 180 #      define HOST_l2c(l,c)      (*((unsigned int *)(c))=(l), (c)+=4, (l))
 181 #     endif
 182 #    endif
 183 #   endif
 184 #  endif
 185 #  if defined(__s390__) || defined(__s390x__)
 186 #   define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
 187 #   define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
 188 #  endif
 189 # endif
 190
 191 # ifndef HOST_c2l
 192 #  define HOST_c2l(c,l)   (l =(((unsigned long)(*((c)++)))<<24),          \
 193                          l|=(((unsigned long)(*((c)++)))<<16),          \
 194                          l|=(((unsigned long)(*((c)++)))<< 8),          \
 195                          l|=(((unsigned long)(*((c)++)))    )           )
 196 # endif
 197 # ifndef HOST_l2c
 198 #  define HOST_l2c(l,c)   (*((c)++)=(unsigned char)(((l)>>24)&0xff),      \
 199                          *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
 200                          *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
 201                          *((c)++)=(unsigned char)(((l)    )&0xff),      \
 202                          l)
 203 # endif
 204
 205 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 206
 207 # ifndef PEDANTIC
 208 #  if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 209 #   if defined(__s390x__)
 210 #    define HOST_c2l(c,l)        ({ asm ("lrv    %0,%1"                  \
 211                                    :"=d"(l) :"m"(*(const unsigned int *)(c)));\
 212                                    (c)+=4; (l);                         })
 213 #    define HOST_l2c(l,c)        ({ asm ("strv   %1,%0"                  \
 214                                    :"=m"(*(unsigned int *)(c)) :"d"(l));\
 215                                    (c)+=4; (l);                         })
 216 #   endif
 217 #  endif
 218 #  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 219 #   ifndef B_ENDIAN
 220     /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
 221 #    define HOST_c2l(c,l)        ((l)=*((const unsigned int *)(c)), (c)+=4, l)
 222 #    define HOST_l2c(l,c)        (*((unsigned int *)(c))=(l), (c)+=4, l)
 223 #   endif
 224 #  endif
 225 # endif
 226
 227 # ifndef HOST_c2l
 228 #  define HOST_c2l(c,l)   (l =(((unsigned long)(*((c)++)))    ),          \
 229                          l|=(((unsigned long)(*((c)++)))<< 8),          \
 230                          l|=(((unsigned long)(*((c)++)))<<16),          \
 231                          l|=(((unsigned long)(*((c)++)))<<24)           )
 232 # endif
 233 # ifndef HOST_l2c
 234 #  define HOST_l2c(l,c)   (*((c)++)=(unsigned char)(((l)    )&0xff),      \
 235                          *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
 236                          *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
 237                          *((c)++)=(unsigned char)(((l)>>24)&0xff),      \
 238                          l)
 239 # endif
 240
 241 #endif
 242
 243 /*
 244  * Time for some action:-)
 245  */
 246
 247 int HASH_UPDATE(HASH_CTX *c, const void *data_, size_t len)
 248 {
 249     const unsigned char *data = data_;
 250     unsigned char *p;
 251     HASH_LONG l;
 252     size_t n;
 253
 254     if (len == 0)
 255         return 1;
 256
 257     l = (c->Nl + (((HASH_LONG) len) << 3)) & 0xffffffffUL;
 258     /*
 259      * 95-05-24 eay Fixed a bug with the overflow handling, thanks to Wei Dai
 260      * <weidai@eskimo.com> for pointing it out.
 261      */
 262     if (l < c->Nl)              /* overflow */
 263         c->Nh++;
 264     c->Nh += (HASH_LONG) (len >> 29); /* might cause compiler warning on
 265                                        * 16-bit */
 266     c->Nl = l;
 267
 268     n = c->num;
 269     if (n != 0) {
 270         p = (unsigned char *)c->data;
 271
 272         if (len >= HASH_CBLOCK || len + n >= HASH_CBLOCK) {
 273             memcpy(p + n, data, HASH_CBLOCK - n);
 274             HASH_BLOCK_DATA_ORDER(c, p, 1);
 275             n = HASH_CBLOCK - n;
 276             data += n;
 277             len -= n;
 278             c->num = 0;
 279             memset(p, 0, HASH_CBLOCK); /* keep it zeroed */
 280         } else {
 281             memcpy(p + n, data, len);
 282             c->num += (unsigned int)len;
 283             return 1;
 284         }
 285     }
 286
 287     n = len / HASH_CBLOCK;
 288     if (n > 0) {
 289         HASH_BLOCK_DATA_ORDER(c, data, n);
 290         n *= HASH_CBLOCK;
 291         data += n;
 292         len -= n;
 293     }
 294
 295     if (len != 0) {
 296         p = (unsigned char *)c->data;
 297         c->num = (unsigned int)len;
 298         memcpy(p, data, len);
 299     }
 300     return 1;
 301 }
 302
 303 void HASH_TRANSFORM(HASH_CTX *c, const unsigned char *data)
 304 {
 305     HASH_BLOCK_DATA_ORDER(c, data, 1);
 306 }
 307
 308 int HASH_FINAL(unsigned char *md, HASH_CTX *c)
 309 {
 310     unsigned char *p = (unsigned char *)c->data;
 311     size_t n = c->num;
 312
 313     p[n] = 0x80;                /* there is always room for one */
 314     n++;
 315
 316     if (n > (HASH_CBLOCK - 8)) {
 317         memset(p + n, 0, HASH_CBLOCK - n);
 318         n = 0;
 319         HASH_BLOCK_DATA_ORDER(c, p, 1);
 320     }
 321     memset(p + n, 0, HASH_CBLOCK - 8 - n);
 322
 323     p += HASH_CBLOCK - 8;
 324 #if   defined(DATA_ORDER_IS_BIG_ENDIAN)
 325     (void)HOST_l2c(c->Nh, p);
 326     (void)HOST_l2c(c->Nl, p);
 327 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 328     (void)HOST_l2c(c->Nl, p);
 329     (void)HOST_l2c(c->Nh, p);
 330 #endif
 331     p -= HASH_CBLOCK;
 332     HASH_BLOCK_DATA_ORDER(c, p, 1);
 333     c->num = 0;
 334     memset(p, 0, HASH_CBLOCK);
 335
 336 #ifndef HASH_MAKE_STRING
 337 # error "HASH_MAKE_STRING must be defined!"
 338 #else
 339     HASH_MAKE_STRING(c, md);
 340 #endif
 341
 342     return 1;
 343 }
 344
 345 #ifndef MD32_REG_T
 346 # if defined(__alpha) || defined(__sparcv9) || defined(__mips)
 347 #  define MD32_REG_T long
 348 /*
 349  * This comment was originally written for MD5, which is why it
 350  * discusses A-D. But it basically applies to all 32-bit digests,
 351  * which is why it was moved to common header file.
 352  *
 353  * In case you wonder why A-D are declared as long and not
 354  * as MD5_LONG. Doing so results in slight performance
 355  * boost on LP64 architectures. The catch is we don't
 356  * really care if 32 MSBs of a 64-bit register get polluted
 357  * with eventual overflows as we *save* only 32 LSBs in
 358  * *either* case. Now declaring 'em long excuses the compiler
 359  * from keeping 32 MSBs zeroed resulting in 13% performance
 360  * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
 361  * Well, to be honest it should say that this *prevents*
 362  * performance degradation.
 363  *                              <appro@fy.chalmers.se>
 364  */
 365 # else
 366 /*
 367  * Above is not absolute and there are LP64 compilers that
 368  * generate better code if MD32_REG_T is defined int. The above
 369  * pre-processor condition reflects the circumstances under which
 370  * the conclusion was made and is subject to further extension.
 371  *                              <appro@fy.chalmers.se>
 372  */
 373 #  define MD32_REG_T int
 374 # endif
 375 #endif