From 0468feb28696751efcddada8de69a622afffdba8 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 20 Jun 2014 22:49:20 -0700 Subject: [PATCH] Sync to current upstream arc4random. This is actually revision 1.33, which is no longer the latest, but it's as close to head as we can currently reasonably get. I've also switched to the OpenBSD getentropy_linux.c implementation of getentropy, lightly modified to try to report an error on failure. Bug: 14499627 Change-Id: Ia7c561184b1f366c9bf66f248aa60f0d53535fcb --- libc/Android.mk | 2 +- libc/bionic/arc4random.c | 403 +++++++++--------- .../{thread_atexit.cpp => thread_private.cpp} | 20 +- libc/include/stdlib.h | 7 +- libc/private/thread_private.h | 14 +- .../lib/libc/crypt/chacha_private.h | 222 ++++++++++ 6 files changed, 450 insertions(+), 218 deletions(-) rename libc/bionic/{thread_atexit.cpp => thread_private.cpp} (84%) create mode 100644 libc/upstream-openbsd/lib/libc/crypt/chacha_private.h diff --git a/libc/Android.mk b/libc/Android.mk index 002978c4e..f61482471 100644 --- a/libc/Android.mk +++ b/libc/Android.mk @@ -217,7 +217,7 @@ libc_bionic_src_files := \ bionic/system_properties.cpp \ bionic/tdestroy.cpp \ bionic/termios.cpp \ - bionic/thread_atexit.cpp \ + bionic/thread_private.cpp \ bionic/tmpfile.cpp \ bionic/umount.cpp \ bionic/unlink.cpp \ diff --git a/libc/bionic/arc4random.c b/libc/bionic/arc4random.c index 687030b86..9bdf3417e 100644 --- a/libc/bionic/arc4random.c +++ b/libc/bionic/arc4random.c @@ -1,8 +1,9 @@ -/* $OpenBSD: arc4random.c,v 1.19 2008/06/04 00:50:23 djm Exp $ */ +/* $OpenBSD: arc4random.c,v 1.33 2014/06/13 18:58:58 deraadt Exp $ */ /* * Copyright (c) 1996, David Mazieres * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,214 +19,236 @@ */ /* - * Arc4 random number generator for OpenBSD. - * - * This code is derived from section 17.1 of Applied Cryptography, - * second edition, which describes a stream cipher allegedly - * compatible with RSA Labs "RC4" cipher (the actual description of - * which is a trade secret). The same algorithm is used as a stream - * cipher called "arcfour" in Tatu Ylonen's ssh package. - * - * Here the stream cipher has been modified always to include the time - * when initializing the state. That makes it impossible to - * regenerate the same random sequence twice, so this can't be used - * for encryption, but will generate good random numbers. - * - * RC4 is a registered trademark of RSA Laboratories. + * ChaCha based random number generator for OpenBSD. */ #include #include #include +#include #include #include #include #include +#include + +#if defined(__ANDROID__) +#include +#include +#include "private/libc_logging.h" #include "private/thread_private.h" -/* BIONIC-BEGIN */ -/* this lock should protect the global variables in this file */ -static pthread_mutex_t _arc4_lock = PTHREAD_MUTEX_INITIALIZER; -#define _ARC4_LOCK() pthread_mutex_lock(&_arc4_lock) -#define _ARC4_UNLOCK() pthread_mutex_unlock(&_arc4_lock) -/* BIONIC-END */ +#define explicit_bzero(p, s) memset(p, 0, s) + +#undef MAP_ANON +#define MAP_ANON (MAP_PRIVATE | MAP_ANONYMOUS) + +/* + * XXX Should be replaced with a proper entropy measure. + */ +static int +gotdata(u_char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +static int +getentropy/*_urandom*/(u_char *buf, size_t len) +{ + int save_errno = errno; + + int fd = TEMP_FAILURE_RETRY(open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOFOLLOW, 0)); + if (fd == -1) { + __libc_fatal("getentropy_urandom failed to open \"/dev/urandom\": %s", + strerror(errno)); + } + + /* Lightly verify that the device node looks sane */ + struct stat st; + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + __libc_fatal("getentropy_urandom failed to fstat \"/dev/urandom\": %s", + strerror(errno)); + } + int cnt; + if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) { + __libc_fatal("getentropy_urandom failed to ioctl \"/dev/urandom\": %s", + strerror(errno)); + } + for (size_t i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = TEMP_FAILURE_RETRY(read(fd, buf + i, wanted)); + + if (ret == -1) { + __libc_fatal("getentropy_urandom failed to read \"/dev/urandom\": %s", + strerror(errno)); + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == -1) { + __libc_fatal("getentropy_urandom failed to get enough entropy: %s", + strerror(errno)); + } + + errno = save_errno; + return 0; +} +#endif /* __ANDROID__ */ + +#define KEYSTREAM_ONLY +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include "../upstream-openbsd/lib/libc/crypt/chacha_private.h" +#pragma GCC diagnostic pop #ifdef __GNUC__ #define inline __inline -#else /* !__GNUC__ */ +#else /* !__GNUC__ */ #define inline -#endif /* !__GNUC__ */ - -struct arc4_stream { - u_int8_t i; - u_int8_t j; - u_int8_t s[256]; -}; +#endif /* !__GNUC__ */ +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#define RSBUFSZ (16*BLOCKSZ) static int rs_initialized; -static struct arc4_stream rs; -static pid_t arc4_stir_pid; -static int arc4_count; +static pid_t rs_stir_pid; +static chacha_ctx *rs; /* chacha context for random keystream */ +static u_char *rs_buf; /* keystream blocks */ +static size_t rs_have; /* valid bytes at end of rs_buf */ +static size_t rs_count; /* bytes till reseed */ -static inline u_int8_t arc4_getbyte(void); +static inline void _rs_rekey(u_char *dat, size_t datlen); static inline void -arc4_init(void) +_rs_init(u_char *buf, size_t n) { - int n; + if (n < KEYSZ + IVSZ) + return; - for (n = 0; n < 256; n++) - rs.s[n] = n; - rs.i = 0; - rs.j = 0; -} + if (rs == NULL && (rs = mmap(NULL, sizeof(*rs), PROT_READ|PROT_WRITE, + MAP_ANON, -1, 0)) == MAP_FAILED) + abort(); + if (rs_buf == NULL && (rs_buf = mmap(NULL, RSBUFSZ, PROT_READ|PROT_WRITE, + MAP_ANON, -1, 0)) == MAP_FAILED) + abort(); -static inline void -arc4_addrandom(u_char *dat, int datlen) -{ - int n; - u_int8_t si; - - rs.i--; - for (n = 0; n < 256; n++) { - rs.i = (rs.i + 1); - si = rs.s[rs.i]; - rs.j = (rs.j + si + dat[n % datlen]); - rs.s[rs.i] = rs.s[rs.j]; - rs.s[rs.j] = si; - } - rs.j = rs.i; + chacha_keysetup(rs, buf, KEYSZ * 8, 0); + chacha_ivsetup(rs, buf + KEYSZ); } static void -arc4_stir(void) +_rs_stir(void) { -#if 1 /* BIONIC-BEGIN */ - int i, fd; - union { - struct timeval tv; - u_int rnd[128 / sizeof(u_int)]; - } rdat; - int n; + u_char rnd[KEYSZ + IVSZ]; - if (!rs_initialized) { - arc4_init(); - rs_initialized = 1; - } + /* XXX */ + (void) getentropy(rnd, sizeof rnd); - fd = open("/dev/urandom", O_RDONLY); - if (fd != -1) { - read(fd, rdat.rnd, sizeof(rdat.rnd)); - close(fd); - } - else - { - /* fd < 0 ? Ah, what the heck. We'll just take - * whatever was on the stack. just add a little more - * time-based randomness though - */ - gettimeofday(&rdat.tv, NULL); - } + if (!rs_initialized) { + rs_initialized = 1; + _rs_init(rnd, sizeof(rnd)); + } else + _rs_rekey(rnd, sizeof(rnd)); + explicit_bzero(rnd, sizeof(rnd)); - arc4_stir_pid = getpid(); - arc4_addrandom((void *) &rdat, sizeof(rdat)); -#else /* BIONIC-END */ - int i, mib[2]; - size_t len; - u_char rnd[128]; + /* invalidate rs_buf */ + rs_have = 0; + memset(rs_buf, 0, RSBUFSZ); - if (!rs_initialized) { - arc4_init(); - rs_initialized = 1; - } + rs_count = 1600000; +} - mib[0] = CTL_KERN; - mib[1] = KERN_ARND; +static inline void +_rs_stir_if_needed(size_t len) +{ + pid_t pid = getpid(); - len = sizeof(rnd); - sysctl(mib, 2, rnd, &len, NULL, 0); + if (rs_count <= len || !rs_initialized || rs_stir_pid != pid) { + rs_stir_pid = pid; + _rs_stir(); + } else + rs_count -= len; +} - arc4_stir_pid = getpid(); - arc4_addrandom(rnd, sizeof(rnd)); +static inline void +_rs_rekey(u_char *dat, size_t datlen) +{ +#ifndef KEYSTREAM_ONLY + memset(rs_buf, 0,RSBUFSZ); #endif - /* - * Discard early keystream, as per recommendations in: - * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps - */ - for (i = 0; i < 256; i++) - (void)arc4_getbyte(); - arc4_count = 1600000; + /* fill rs_buf with the keystream */ + chacha_encrypt_bytes(rs, rs_buf, rs_buf, RSBUFSZ); + /* mix in optional user provided data */ + if (dat) { + size_t i, m; + + m = MIN(datlen, KEYSZ + IVSZ); + for (i = 0; i < m; i++) + rs_buf[i] ^= dat[i]; + } + /* immediately reinit for backtracking resistance */ + _rs_init(rs_buf, KEYSZ + IVSZ); + memset(rs_buf, 0, KEYSZ + IVSZ); + rs_have = RSBUFSZ - KEYSZ - IVSZ; } -static inline u_int8_t -arc4_getbyte(void) +static inline void +_rs_random_buf(void *_buf, size_t n) { - u_int8_t si, sj; + u_char *buf = (u_char *)_buf; + size_t m; - rs.i = (rs.i + 1); - si = rs.s[rs.i]; - rs.j = (rs.j + si); - sj = rs.s[rs.j]; - rs.s[rs.i] = sj; - rs.s[rs.j] = si; - return (rs.s[(si + sj) & 0xff]); + _rs_stir_if_needed(n); + while (n > 0) { + if (rs_have > 0) { + m = MIN(n, rs_have); + memcpy(buf, rs_buf + RSBUFSZ - rs_have, m); + memset(rs_buf + RSBUFSZ - rs_have, 0, m); + buf += m; + n -= m; + rs_have -= m; + } + if (rs_have == 0) + _rs_rekey(NULL, 0); + } } -static inline u_int32_t -arc4_getword(void) +static inline void +_rs_random_u32(u_int32_t *val) { - u_int32_t val; - val = arc4_getbyte() << 24; - val |= arc4_getbyte() << 16; - val |= arc4_getbyte() << 8; - val |= arc4_getbyte(); - return val; -} - -void -arc4random_stir(void) -{ - _ARC4_LOCK(); - arc4_stir(); - _ARC4_UNLOCK(); -} - -void -arc4random_addrandom(u_char *dat, int datlen) -{ - _ARC4_LOCK(); - if (!rs_initialized) - arc4_stir(); - arc4_addrandom(dat, datlen); - _ARC4_UNLOCK(); + _rs_stir_if_needed(sizeof(*val)); + if (rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + memcpy(val, rs_buf + RSBUFSZ - rs_have, sizeof(*val)); + memset(rs_buf + RSBUFSZ - rs_have, 0, sizeof(*val)); + rs_have -= sizeof(*val); } u_int32_t arc4random(void) { - u_int32_t val; - _ARC4_LOCK(); - arc4_count -= 4; - if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != getpid()) - arc4_stir(); - val = arc4_getword(); - _ARC4_UNLOCK(); - return val; + u_int32_t val; + + _ARC4_LOCK(); + _rs_random_u32(&val); + _ARC4_UNLOCK(); + return val; } void -arc4random_buf(void *_buf, size_t n) +arc4random_buf(void *buf, size_t n) { - u_char *buf = (u_char *)_buf; - _ARC4_LOCK(); - if (!rs_initialized || arc4_stir_pid != getpid()) - arc4_stir(); - while (n--) { - if (--arc4_count <= 0) - arc4_stir(); - buf[n] = arc4_getbyte(); - } - _ARC4_UNLOCK(); + _ARC4_LOCK(); + _rs_random_buf(buf, n); + _ARC4_UNLOCK(); } /* @@ -241,55 +264,25 @@ arc4random_buf(void *_buf, size_t n) u_int32_t arc4random_uniform(u_int32_t upper_bound) { - u_int32_t r, min; + u_int32_t r, min; - if (upper_bound < 2) - return 0; + if (upper_bound < 2) + return 0; -#if (ULONG_MAX > 0xffffffffUL) - min = 0x100000000UL % upper_bound; -#else - /* Calculate (2**32 % upper_bound) avoiding 64-bit math */ - if (upper_bound > 0x80000000) - min = 1 + ~upper_bound; /* 2**32 - upper_bound */ - else { - /* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */ - min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound; - } -#endif + /* 2**32 % x == (2**32 - x) % x */ + min = -upper_bound % upper_bound; - /* - * This could theoretically loop forever but each retry has - * p > 0.5 (worst case, usually far better) of selecting a - * number inside the range we need, so it should rarely need - * to re-roll. - */ - for (;;) { - r = arc4random(); - if (r >= min) - break; - } + /* + * This could theoretically loop forever but each retry has + * p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need + * to re-roll. + */ + for (;;) { + r = arc4random(); + if (r >= min) + break; + } - return r % upper_bound; + return r % upper_bound; } - -#if 0 -/*-------- Test code for i386 --------*/ -#include -#include -int -main(int argc, char **argv) -{ - const int iter = 1000000; - int i; - pctrval v; - - v = rdtsc(); - for (i = 0; i < iter; i++) - arc4random(); - v = rdtsc() - v; - v /= iter; - - printf("%qd cycles\n", v); -} -#endif diff --git a/libc/bionic/thread_atexit.cpp b/libc/bionic/thread_private.cpp similarity index 84% rename from libc/bionic/thread_atexit.cpp rename to libc/bionic/thread_private.cpp index 68c119dfc..1c04019c9 100644 --- a/libc/bionic/thread_atexit.cpp +++ b/libc/bionic/thread_private.cpp @@ -26,17 +26,13 @@ * SUCH DAMAGE. */ -/* some simple glue used to make the BSD atexit code happy */ - #include +#include "private/thread_private.h" + +// Some simple glue used to make BSD code thread-safe. static pthread_mutex_t g_atexit_lock = PTHREAD_MUTEX_INITIALIZER; -__BEGIN_DECLS -__LIBC_HIDDEN__ void _thread_atexit_lock(); -__LIBC_HIDDEN__ void _thread_atexit_unlock(); -__END_DECLS - void _thread_atexit_lock() { pthread_mutex_lock(&g_atexit_lock); } @@ -44,3 +40,13 @@ void _thread_atexit_lock() { void _thread_atexit_unlock() { pthread_mutex_unlock(&g_atexit_lock); } + +static pthread_mutex_t g_arc4_lock = PTHREAD_MUTEX_INITIALIZER; + +void _thread_arc4_lock() { + pthread_mutex_lock(&g_arc4_lock); +} + +void _thread_arc4_unlock() { + pthread_mutex_unlock(&g_arc4_lock); +} diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h index 266aa5e78..62b7a67b8 100644 --- a/libc/include/stdlib.h +++ b/libc/include/stdlib.h @@ -100,9 +100,10 @@ extern unsigned short *seed48(unsigned short*); extern double erand48(unsigned short xsubi[3]); extern double drand48(void); extern void srand48(long); -extern unsigned int arc4random(void); -extern void arc4random_stir(void); -extern void arc4random_addrandom(unsigned char *, int); + +unsigned int arc4random(void); +unsigned int arc4random_uniform(unsigned int); +void arc4random_buf(void*, size_t); #define RAND_MAX 0x7fffffff diff --git a/libc/private/thread_private.h b/libc/private/thread_private.h index f73118131..724808a69 100644 --- a/libc/private/thread_private.h +++ b/libc/private/thread_private.h @@ -7,6 +7,8 @@ #include +__BEGIN_DECLS + /* * This file defines the thread library interface to libc. Thread * libraries must implement the functions described here for proper @@ -31,10 +33,18 @@ struct __thread_private_tag_t { #define _THREAD_PRIVATE_MUTEX_UNLOCK(name) \ pthread_mutex_unlock( &__THREAD_NAME(name)._private_lock ) -void _thread_atexit_lock(void); -void _thread_atexit_unlock(void); +__LIBC_HIDDEN__ void _thread_atexit_lock(void); +__LIBC_HIDDEN__ void _thread_atexit_unlock(void); #define _ATEXIT_LOCK() _thread_atexit_lock() #define _ATEXIT_UNLOCK() _thread_atexit_unlock() +__LIBC_HIDDEN__ void _thread_arc4_lock(void); +__LIBC_HIDDEN__ void _thread_arc4_unlock(void); + +#define _ARC4_LOCK() _thread_arc4_lock() +#define _ARC4_UNLOCK() _thread_arc4_unlock() + +__END_DECLS + #endif /* _THREAD_PRIVATE_H_ */ diff --git a/libc/upstream-openbsd/lib/libc/crypt/chacha_private.h b/libc/upstream-openbsd/lib/libc/crypt/chacha_private.h new file mode 100644 index 000000000..7c3680fa6 --- /dev/null +++ b/libc/upstream-openbsd/lib/libc/crypt/chacha_private.h @@ -0,0 +1,222 @@ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x,const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + QUARTERROUND( x0, x4, x8,x12) + QUARTERROUND( x1, x5, x9,x13) + QUARTERROUND( x2, x6,x10,x14) + QUARTERROUND( x3, x7,x11,x15) + QUARTERROUND( x0, x5,x10,x15) + QUARTERROUND( x1, x6,x11,x12) + QUARTERROUND( x2, x7, x8,x13) + QUARTERROUND( x3, x4, x9,x14) + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + return; + } + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +}