summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
c5307d9)
On contemporary platforms assembly GHASH processes multiple blocks
faster than one by one. For TLS payloads shorter than 16 bytes, e.g.
alerts, it's possible to reduce hashing operation to single call.
And for block lengths not divisible by 16 - fold two final calls to
one. Improvement is most noticeable with "reptoline", because call to
assembly GHASH is indirect.
Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6312)
long one;
char little;
} is_endian = { 1 };
long one;
char little;
} is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
return -1;
ctx->len.u[1] = mlen;
return -1;
ctx->len.u[1] = mlen;
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
ctx->mres = n;
return 0;
}
ctx->mres = n;
return 0;
}
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
# if defined(GHASH)
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
# endif
else
ctx->Yi.d[3] = ctr;
# endif
else
ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+ while (len--) {
+ ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+ ++n;
+ }
+# else
while (len--) {
ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
++n;
}
while (len--) {
ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
++n;
}
else
ctx->Yi.d[3] = ctr;
}
else
ctx->Yi.d[3] = ctr;
}
- ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+ mres = 0;
+ }
+#else
+ ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+ mres = n = (n + 1) % 16;
if (n == 0)
GCM_MUL(ctx);
if (n == 0)
GCM_MUL(ctx);
long one;
char little;
} is_endian = { 1 };
long one;
char little;
} is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
return -1;
ctx->len.u[1] = mlen;
return -1;
ctx->len.u[1] = mlen;
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
--len;
n = (n + 1) % 16;
}
ctx->mres = n;
return 0;
}
ctx->mres = n;
return 0;
}
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
# if defined(GHASH)
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
# endif
else
ctx->Yi.d[3] = ctr;
# endif
else
ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+ while (len--) {
+ out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+ ++n;
+ }
+# else
while (len--) {
u8 c = in[n];
ctx->Xi.c[n] ^= c;
out[n] = c ^ ctx->EKi.c[n];
++n;
}
while (len--) {
u8 c = in[n];
ctx->Xi.c[n] ^= c;
out[n] = c ^ ctx->EKi.c[n];
++n;
}
else
ctx->Yi.d[3] = ctr;
}
else
ctx->Yi.d[3] = ctr;
}
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
+ n = (n + 1) % 16;
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+ mres = 0;
+ }
+#else
c = in[i];
out[i] = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
c = in[i];
out[i] = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
+ mres = n = (n + 1) % 16;
if (n == 0)
GCM_MUL(ctx);
if (n == 0)
GCM_MUL(ctx);
long one;
char little;
} is_endian = { 1 };
long one;
char little;
} is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
return -1;
ctx->len.u[1] = mlen;
return -1;
ctx->len.u[1] = mlen;
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
+# if defined(GHASH)
+ while (n && len) {
+ ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
ctx->mres = n;
return 0;
}
ctx->mres = n;
return 0;
}
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
while (len >= GHASH_CHUNK) {
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
ctx->Yi.d[3] = BSWAP4(ctr);
ctx->Yi.d[3] = BSWAP4(ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
else
ctx->Yi.d[3] = ctr;
GHASH(ctx, out, GHASH_CHUNK);
else
ctx->Yi.d[3] = ctr;
GHASH(ctx, out, GHASH_CHUNK);
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
else
ctx->Yi.d[3] = ctr;
while (len--) {
else
ctx->Yi.d[3] = ctr;
while (len--) {
- ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# if defined(GHASH)
+ ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+# else
+ ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# endif
long one;
char little;
} is_endian = { 1 };
long one;
char little;
} is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
return -1;
ctx->len.u[1] = mlen;
return -1;
ctx->len.u[1] = mlen;
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
+# if defined(GHASH)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+# else
+# if defined(GHASH)
+ while (n && len) {
+ *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
--len;
n = (n + 1) % 16;
}
ctx->mres = n;
return 0;
}
ctx->mres = n;
return 0;
}
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
GHASH(ctx, in, GHASH_CHUNK);
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
while (len >= GHASH_CHUNK) {
GHASH(ctx, in, GHASH_CHUNK);
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
ctx->Yi.d[3] = BSWAP4(ctr);
ctx->Yi.d[3] = BSWAP4(ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
else
ctx->Yi.d[3] = ctr;
out += GHASH_CHUNK;
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
else
ctx->Yi.d[3] = ctr;
out += GHASH_CHUNK;
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
else
ctx->Yi.d[3] = ctr;
while (len--) {
else
ctx->Yi.d[3] = ctr;
while (len--) {
+# if defined(GHASH)
+ out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+# else
+ ctx->Xi.c[mres++] ^= c;
out[n] = c ^ ctx->EKi.c[n];
out[n] = c ^ ctx->EKi.c[n];
u64 clen = ctx->len.u[1] << 3;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
u64 clen = ctx->len.u[1] << 3;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
+ const u8 *inp, size_t len) = ctx->ghash;
+# endif
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ u128 bitlen;
+ unsigned int mres = ctx->mres;
+
+ if (mres) {
+ unsigned blocks = (mres + 15) & -16;
+
+ memset(ctx->Xn + mres, 0, blocks - mres);
+ mres = blocks;
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+ } else if (ctx->ares) {
+ GCM_MUL(ctx);
+ }
+#else
if (ctx->mres || ctx->ares)
GCM_MUL(ctx);
if (ctx->mres || ctx->ares)
GCM_MUL(ctx);
if (is_endian.little) {
#ifdef BSWAP8
if (is_endian.little) {
#ifdef BSWAP8
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ bitlen.hi = alen;
+ bitlen.lo = clen;
+ memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
+ mres += sizeof(bitlen);
+ GHASH(ctx, ctx->Xn, mres);
+#else
ctx->Xi.u[0] ^= alen;
ctx->Xi.u[1] ^= clen;
GCM_MUL(ctx);
ctx->Xi.u[0] ^= alen;
ctx->Xi.u[1] ^= clen;
GCM_MUL(ctx);
ctx->Xi.u[0] ^= ctx->EK0.u[0];
ctx->Xi.u[1] ^= ctx->EK0.u[1];
ctx->Xi.u[0] ^= ctx->EK0.u[0];
ctx->Xi.u[1] ^= ctx->EK0.u[1];
unsigned int mres, ares;
block128_f block;
void *key;
unsigned int mres, ares;
block128_f block;
void *key;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ unsigned char Xn[48];
+#endif
};
struct xts128_context {
};
struct xts128_context {