add optimized SHA1 algorithm

This optimized implementation of the SHA1 algorithm is about 28%
faster than the old one (on sapphire hardware) but assumes
little-endianness.  Add it, but continue using the old implementation
on big-endian hardware.
This commit is contained in:
Doug Zongker 2009-08-17 15:38:31 -07:00
parent 818806ad59
commit a6de77de17
2 changed files with 195 additions and 24 deletions

View file

@ -13,14 +13,14 @@
** be used to endorse or promote products derived from this software
** without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
** EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
** OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
** ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@ -29,6 +29,7 @@
#define _EMBEDDED_SHA_H_
#include <inttypes.h>
#include <endian.h>
#ifdef __cplusplus
extern "C" {
@ -36,16 +37,23 @@ extern "C" {
typedef struct SHA_CTX {
uint64_t count;
uint8_t buf[64];
uint32_t state[5];
#if __BYTE_ORDER == __LITTLE_ENDIAN
union {
uint8_t b[64];
uint32_t w[16];
} buf;
#else
uint8_t buf[64];
#endif
} SHA_CTX;
void SHA_init(SHA_CTX *ctx);
void SHA_update(SHA_CTX *ctx, const void* data, int len);
const uint8_t* SHA_final(SHA_CTX *ctx);
void SHA_init(SHA_CTX* ctx);
void SHA_update(SHA_CTX* ctx, const void* data, int len);
const uint8_t* SHA_final(SHA_CTX* ctx);
/* Convenience method. Returns digest parameter value. */
const uint8_t* SHA(const void *data, int len, uint8_t *digest);
const uint8_t* SHA(const void* data, int len, uint8_t* digest);
#define SHA_DIGEST_SIZE 20

View file

@ -13,20 +13,181 @@
** be used to endorse or promote products derived from this software
** without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
** EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
** OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
** ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <byteswap.h>
#include <endian.h>
#include <memory.h>
#include "mincrypt/sha.h"
#if __BYTE_ORDER == __LITTLE_ENDIAN
// This version is about 28% faster than the generic version below,
// but assumes little-endianness.
static inline uint32_t ror27(uint32_t val) {
return (val >> 27) | (val << 5);
}
static inline uint32_t ror2(uint32_t val) {
return (val >> 2) | (val << 30);
}
static inline uint32_t ror31(uint32_t val) {
return (val >> 31) | (val << 1);
}
static void SHA1_Transform(SHA_CTX* ctx) {
uint32_t W[80];
register uint32_t A, B, C, D, E;
int t;
A = ctx->state[0];
B = ctx->state[1];
C = ctx->state[2];
D = ctx->state[3];
E = ctx->state[4];
#define SHA_F1(A,B,C,D,E,t) \
E += ror27(A) + \
(W[t] = bswap_32(ctx->buf.w[t])) + \
(D^(B&(C^D))) + 0x5A827999; \
B = ror2(B);
for (t = 0; t < 15; t += 5) {
SHA_F1(A,B,C,D,E,t + 0);
SHA_F1(E,A,B,C,D,t + 1);
SHA_F1(D,E,A,B,C,t + 2);
SHA_F1(C,D,E,A,B,t + 3);
SHA_F1(B,C,D,E,A,t + 4);
}
SHA_F1(A,B,C,D,E,t + 0); // 16th one, t == 15
#undef SHA_F1
#define SHA_F1(A,B,C,D,E,t) \
E += ror27(A) + \
(W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \
(D^(B&(C^D))) + 0x5A827999; \
B = ror2(B);
SHA_F1(E,A,B,C,D,t + 1);
SHA_F1(D,E,A,B,C,t + 2);
SHA_F1(C,D,E,A,B,t + 3);
SHA_F1(B,C,D,E,A,t + 4);
#undef SHA_F1
#define SHA_F2(A,B,C,D,E,t) \
E += ror27(A) + \
(W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \
(B^C^D) + 0x6ED9EBA1; \
B = ror2(B);
for (t = 20; t < 40; t += 5) {
SHA_F2(A,B,C,D,E,t + 0);
SHA_F2(E,A,B,C,D,t + 1);
SHA_F2(D,E,A,B,C,t + 2);
SHA_F2(C,D,E,A,B,t + 3);
SHA_F2(B,C,D,E,A,t + 4);
}
#undef SHA_F2
#define SHA_F3(A,B,C,D,E,t) \
E += ror27(A) + \
(W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \
((B&C)|(D&(B|C))) + 0x8F1BBCDC; \
B = ror2(B);
for (; t < 60; t += 5) {
SHA_F3(A,B,C,D,E,t + 0);
SHA_F3(E,A,B,C,D,t + 1);
SHA_F3(D,E,A,B,C,t + 2);
SHA_F3(C,D,E,A,B,t + 3);
SHA_F3(B,C,D,E,A,t + 4);
}
#undef SHA_F3
#define SHA_F4(A,B,C,D,E,t) \
E += ror27(A) + \
(W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \
(B^C^D) + 0xCA62C1D6; \
B = ror2(B);
for (; t < 80; t += 5) {
SHA_F4(A,B,C,D,E,t + 0);
SHA_F4(E,A,B,C,D,t + 1);
SHA_F4(D,E,A,B,C,t + 2);
SHA_F4(C,D,E,A,B,t + 3);
SHA_F4(B,C,D,E,A,t + 4);
}
#undef SHA_F4
ctx->state[0] += A;
ctx->state[1] += B;
ctx->state[2] += C;
ctx->state[3] += D;
ctx->state[4] += E;
}
void SHA_update(SHA_CTX* ctx, const void* data, int len) {
int i = ctx->count % sizeof(ctx->buf);
const uint8_t* p = (const uint8_t*)data;
ctx->count += len;
while (len > sizeof(ctx->buf) - i) {
memcpy(&ctx->buf.b[i], p, sizeof(ctx->buf) - i);
len -= sizeof(ctx->buf) - i;
p += sizeof(ctx->buf) - i;
SHA1_Transform(ctx);
i = 0;
}
while (len--) {
ctx->buf.b[i++] = *p++;
if (i == sizeof(ctx->buf)) {
SHA1_Transform(ctx);
i = 0;
}
}
}
const uint8_t* SHA_final(SHA_CTX* ctx) {
uint64_t cnt = ctx->count * 8;
int i;
SHA_update(ctx, (uint8_t*)"\x80", 1);
while ((ctx->count % sizeof(ctx->buf)) != (sizeof(ctx->buf) - 8)) {
SHA_update(ctx, (uint8_t*)"\0", 1);
}
for (i = 0; i < 8; ++i) {
uint8_t tmp = cnt >> ((7 - i) * 8);
SHA_update(ctx, &tmp, 1);
}
for (i = 0; i < 5; i++) {
ctx->buf.w[i] = bswap_32(ctx->state[i]);
}
return ctx->buf.b;
}
#else // __BYTE_ORDER == BIG_ENDIAN
#define rol(bits, value) (((value) << (bits)) | ((value) >> (32 - (bits))))
static void SHA1_transform(SHA_CTX *ctx) {
@ -79,15 +240,6 @@ static void SHA1_transform(SHA_CTX *ctx) {
ctx->state[4] += E;
}
void SHA_init(SHA_CTX *ctx) {
ctx->state[0] = 0x67452301;
ctx->state[1] = 0xEFCDAB89;
ctx->state[2] = 0x98BADCFE;
ctx->state[3] = 0x10325476;
ctx->state[4] = 0xC3D2E1F0;
ctx->count = 0;
}
void SHA_update(SHA_CTX *ctx, const void *data, int len) {
int i = ctx->count % sizeof(ctx->buf);
const uint8_t* p = (const uint8_t*)data;
@ -127,6 +279,17 @@ const uint8_t *SHA_final(SHA_CTX *ctx) {
return ctx->buf;
}
#endif // endianness
void SHA_init(SHA_CTX* ctx) {
ctx->state[0] = 0x67452301;
ctx->state[1] = 0xEFCDAB89;
ctx->state[2] = 0x98BADCFE;
ctx->state[3] = 0x10325476;
ctx->state[4] = 0xC3D2E1F0;
ctx->count = 0;
}
/* Convenience function */
const uint8_t* SHA(const void *data, int len, uint8_t *digest) {
const uint8_t *p;